Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
vihgo
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
vihgo
Commits
a04b957e
Commit
a04b957e
authored
4 years ago
by
Yannis Duffourd
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of gitlab.gad-bioinfo.org:gad-public/vihgo into master
parents
a9fa4b58
b011f58c
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
756 additions
and
908 deletions
+756
-908
Vihgo.cpp
cpp/Vihgo.cpp
+754
-906
main.cpp
cpp/main.cpp
+2
-2
No files found.
cpp/Vihgo.cpp
View file @
a04b957e
// C++ std libs
#include <iostream>
// #include <iomanip>
#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <fstream>
#include <map>
#include <sstream>
#include <stdexcept>
#include <
algorithm
>
#include <
string
>
#include <sys/time.h>
#include <sys/types.h>
#include <vector>
// C++ Boost
#include <boost/program_options.hpp>
// htslib
#include <htslib/sam.h>
...
...
@@ -26,14 +25,13 @@
using
namespace
std
;
Vihgo
::
Vihgo
()
{
Vihgo
::
Vihgo
()
{
//
}
Vihgo
::
Vihgo
(
string
iBF
,
string
gF
,
string
rtF
,
string
intF
,
string
protF
,
string
refF
,
string
outF
,
string
cResF
,
string
csF
,
string
iRCB
,
int
dT
,
int
nT
,
int
lL
,
double
rT
,
double
pvT
,
bool
countStop
)
{
Vihgo
::
Vihgo
(
string
iBF
,
string
gF
,
string
rtF
,
string
intF
,
string
protF
,
string
refF
,
string
outF
,
string
cResF
,
string
csF
,
string
iRCB
,
int
dT
,
int
nT
,
int
lL
,
double
rT
,
double
pvT
,
bool
countStop
)
{
this
->
inputBamFile
=
iBF
;
this
->
geneFile
=
gF
;
this
->
integraseFile
=
intF
;
...
...
@@ -51,76 +49,73 @@ Vihgo::Vihgo( string iBF , string gF, string rtF , string intF, string protF, st
this
->
pvalueThreshold
=
pvT
;
this
->
countStop
=
countStop
;
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTT"
,
"F"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTC"
,
"F"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTA"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTG"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTT"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTC"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTA"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTG"
,
"L"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATT"
,
"I"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATC"
,
"I"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATA"
,
"I"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATG"
,
"M"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTT"
,
"V"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTC"
,
"V"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTA"
,
"V"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTG"
,
"V"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCT"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCC"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCA"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCG"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCT"
,
"P"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCC"
,
"P"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCA"
,
"P"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCG"
,
"P"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACT"
,
"T"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACC"
,
"T"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACA"
,
"T"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACG"
,
"T"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCT"
,
"A"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCC"
,
"A"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCA"
,
"A"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCG"
,
"A"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAT"
,
"Y"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAC"
,
"Y"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAA"
,
"*"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAG"
,
"*"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAT"
,
"H"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAC"
,
"H"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAA"
,
"Q"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAG"
,
"Q"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAT"
,
"N"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAC"
,
"N"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAA"
,
"K"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAG"
,
"K"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAT"
,
"D"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAC"
,
"D"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAA"
,
"E"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAG"
,
"E"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGT"
,
"C"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGC"
,
"C"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGA"
,
"*"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGG"
,
"W"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGT"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGC"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGA"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGG"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGT"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGC"
,
"S"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGA"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGG"
,
"R"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGT"
,
"G"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGC"
,
"G"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGA"
,
"G"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGG"
,
"G"
)
);
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTT"
,
"F"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTC"
,
"F"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTA"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TTG"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTT"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTC"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTA"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CTG"
,
"L"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATT"
,
"I"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATC"
,
"I"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATA"
,
"I"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ATG"
,
"M"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTT"
,
"V"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTC"
,
"V"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTA"
,
"V"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GTG"
,
"V"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCT"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCC"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCA"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TCG"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCT"
,
"P"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCC"
,
"P"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCA"
,
"P"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CCG"
,
"P"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACT"
,
"T"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACC"
,
"T"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACA"
,
"T"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"ACG"
,
"T"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCT"
,
"A"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCC"
,
"A"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCA"
,
"A"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GCG"
,
"A"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAT"
,
"Y"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAC"
,
"Y"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAA"
,
"*"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TAG"
,
"*"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAT"
,
"H"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAC"
,
"H"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAA"
,
"Q"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CAG"
,
"Q"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAT"
,
"N"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAC"
,
"N"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAA"
,
"K"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AAG"
,
"K"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAT"
,
"D"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAC"
,
"D"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAA"
,
"E"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GAG"
,
"E"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGT"
,
"C"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGC"
,
"C"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGA"
,
"*"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"TGG"
,
"W"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGT"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGC"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGA"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"CGG"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGT"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGC"
,
"S"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGA"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"AGG"
,
"R"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGT"
,
"G"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGC"
,
"G"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGA"
,
"G"
));
this
->
codonTable
.
insert
(
pair
<
string
,
string
>
(
"GGG"
,
"G"
));
}
int
Vihgo
::
mainLoop
()
{
int
Vihgo
::
mainLoop
()
{
this
->
getGenome
();
this
->
displayGenome
();
this
->
getGenes
();
...
...
@@ -128,22 +123,20 @@ int Vihgo::mainLoop()
// this->getAnnotation();
this
->
displayCodons
();
this
->
displayReferenceCodons
();
if
(
this
->
isThereAControl
()
)
{
if
(
this
->
isThereAControl
())
{
this
->
readRunControl
();
}
this
->
readSample
();
this
->
displayData
(
"control"
);
this
->
displayData
(
"sample"
);
this
->
writeCodonFile
(
"sample"
);
this
->
displayData
(
"control"
);
this
->
displayData
(
"sample"
);
this
->
writeCodonFile
(
"sample"
);
this
->
writeConsensus
();
this
->
variantCalling
();
return
0
;
}
int
Vihgo
::
getGenome
()
{
int
Vihgo
::
getGenome
()
{
struct
timeval
tbegin
,
tend
;
string
ligne
;
char
mot
;
...
...
@@ -152,34 +145,27 @@ int Vihgo::getGenome()
string
currentChr
;
// fasta parsing
ifstream
fastaStream
(
this
->
referenceFile
.
c_str
()
);
if
(
fastaStream
)
{
while
(
getline
(
fastaStream
,
ligne
)
)
{
if
(
ligne
.
find
(
header
)
==
0
)
{
ligne
.
replace
(
0
,
1
,
""
);
ifstream
fastaStream
(
this
->
referenceFile
.
c_str
());
if
(
fastaStream
)
{
while
(
getline
(
fastaStream
,
ligne
))
{
if
(
ligne
.
find
(
header
)
==
0
)
{
ligne
.
replace
(
0
,
1
,
""
);
// Deal with headers including spaces, etc...
if
(
ligne
.
find
(
" "
)
!=
string
::
npos
)
{
ligne
=
parseOnSep
(
ligne
,
" "
)[
0
];
if
(
ligne
.
find
(
" "
)
!=
string
::
npos
)
{
ligne
=
parseOnSep
(
ligne
,
" "
)[
0
];
}
currentChr
=
ligne
;
map
<
int
,
char
>
tmpMap
;
map
<
int
,
char
>
tmpMap
;
this
->
referenceSequence
[
currentChr
]
=
tmpMap
;
}
else
{
for
(
unsigned
int
k
=
0
;
k
<
ligne
.
length
()
;
k
++
)
{
}
else
{
for
(
unsigned
int
k
=
0
;
k
<
ligne
.
length
();
k
++
)
{
mot
=
ligne
[
k
];
mot
=
checkBase
(
mot
);
mot
=
checkBase
(
mot
);
this
->
referenceSequence
[
currentChr
][
i
]
=
mot
;
i
++
;
i
++
;
}
}
}
...
...
@@ -187,27 +173,23 @@ int Vihgo::getGenome()
return
0
;
}
void
Vihgo
::
displayGenome
()
{
map
<
string
,
map
<
int
,
char
>
>::
iterator
myIterA
;
void
Vihgo
::
displayGenome
()
{
map
<
string
,
map
<
int
,
char
>>::
iterator
myIterA
;
for
(
myIterA
=
this
->
referenceSequence
.
begin
()
;
myIterA
!=
this
->
referenceSequence
.
end
()
;
myIterA
++
)
{
for
(
myIterA
=
this
->
referenceSequence
.
begin
();
myIterA
!=
this
->
referenceSequence
.
end
();
myIterA
++
)
{
cerr
<<
"Chr "
<<
myIterA
->
first
<<
" : "
<<
endl
;
map
<
int
,
char
>
tmpMap
;
tmpMap
=
myIterA
->
second
;
map
<
int
,
char
>::
iterator
myIterB
;
for
(
myIterB
=
tmpMap
.
begin
()
;
myIterB
!=
tmpMap
.
end
()
;
myIterB
++
)
{
for
(
myIterB
=
tmpMap
.
begin
();
myIterB
!=
tmpMap
.
end
();
myIterB
++
)
{
cerr
<<
myIterB
->
first
<<
" : "
<<
myIterB
->
second
<<
" ; "
;
}
cerr
<<
endl
;
}
}
int
Vihgo
::
getGenes
()
{
int
Vihgo
::
getGenes
()
{
struct
timeval
tbegin
,
tend
;
string
ligne
;
string
mot
;
...
...
@@ -223,77 +205,66 @@ int Vihgo::getGenes()
string
strand
;
// gff parsing
ifstream
geneStream
(
this
->
geneFile
.
c_str
()
);
if
(
geneStream
)
{
while
(
getline
(
geneStream
,
ligne
)
)
{
ifstream
geneStream
(
this
->
geneFile
.
c_str
());
if
(
geneStream
)
{
while
(
getline
(
geneStream
,
ligne
))
{
// need to deal with header of the gff "#"
if
(
ligne
.
find
(
headerGff
)
==
0
)
{
if
(
ligne
.
find
(
headerGff
)
==
0
)
{
continue
;
}
i
=
0
;
// get information from the line
istringstream
issLigne
(
ligne
);
while
(
getline
(
issLigne
,
mot
,
'\t'
)
)
{
switch
(
i
)
{
case
0
:
istringstream
issLigne
(
ligne
);
while
(
getline
(
issLigne
,
mot
,
'\t'
))
{
switch
(
i
)
{
case
0
:
chromosome
=
mot
;
break
;
case
2
:
case
2
:
cds
=
mot
;
break
;
case
3
:
start
=
mot
;
case
3
:
start
=
mot
;
break
;
case
4
:
case
4
:
end
=
mot
;
break
;
case
6
:
case
6
:
strand
=
mot
;
break
;
case
8
:
case
8
:
annot
=
mot
;
break
;
default
:
default
:
break
;
}
i
++
;
i
++
;
}
if
(
cds
==
"CDS"
)
{
istringstream
issAnnot
(
annot
);
while
(
getline
(
issAnnot
,
mot
,
';'
)
)
{
if
(
mot
.
find
(
"protein_id"
)
==
0
)
{
istringstream
issMot
(
mot
);
if
(
cds
==
"CDS"
)
{
istringstream
issAnnot
(
annot
);
while
(
getline
(
issAnnot
,
mot
,
';'
))
{
if
(
mot
.
find
(
"protein_id"
)
==
0
)
{
istringstream
issMot
(
mot
);
string
subMot
;
string
g
;
i
=
0
;
while
(
getline
(
issMot
,
subMot
,
'='
)
)
{
if
(
i
==
1
)
{
while
(
getline
(
issMot
,
subMot
,
'='
))
{
if
(
i
==
1
)
{
g
=
subMot
;
}
i
++
;
i
++
;
}
// modify gene string : deleting special chars
// cerr << "Gene before : " << subMot << endl;
subMot
=
pyReplace
(
subMot
,
"
\"
"
,
""
);
subMot
=
pyReplace
(
subMot
,
"
\"
"
,
""
);
// cerr << "Gene after : " << subMot << endl;
// test if gene is already known
if
(
this
->
genes
.
count
(
subMot
)
==
0
)
{
if
(
this
->
genes
.
count
(
subMot
)
==
0
)
{
// cerr << "Creating new gene : " << subMot << endl;
map
<
string
,
string
>
tmpMap
;
map
<
string
,
string
>
tmpMap
;
map
<
int
,
string
>
tmpMapT
;
this
->
genes
[
subMot
]
=
tmpMap
;
// this->referenceCodon[subMot] = tmpMapT;
...
...
@@ -303,23 +274,20 @@ int Vihgo::getGenes()
this
->
genes
[
subMot
][
"chrom"
]
=
chromosome
;
}
// add informations
if
(
(
this
->
genes
[
subMot
][
"start"
]).
length
()
>
0
)
{
(
this
->
genes
[
subMot
][
"start"
]).
append
(
","
);
if
((
this
->
genes
[
subMot
][
"start"
]).
length
()
>
0
)
{
(
this
->
genes
[
subMot
][
"start"
]).
append
(
","
);
}
(
this
->
genes
[
subMot
][
"start"
]).
append
(
start
);
(
this
->
genes
[
subMot
][
"start"
]).
append
(
start
);
if
(
(
this
->
genes
[
subMot
][
"end"
]).
length
()
>
0
)
{
(
this
->
genes
[
subMot
][
"end"
]).
append
(
","
);
if
((
this
->
genes
[
subMot
][
"end"
]).
length
()
>
0
)
{
(
this
->
genes
[
subMot
][
"end"
]).
append
(
","
);
}
(
this
->
genes
[
subMot
][
"end"
]).
append
(
end
);
(
this
->
genes
[
subMot
][
"end"
]).
append
(
end
);
if
(
(
this
->
genes
[
subMot
][
"strand"
]).
length
()
>
0
)
{
if
((
this
->
genes
[
subMot
][
"strand"
]).
length
()
>
0
)
{
(
this
->
genes
[
subMot
][
"strand"
]).
append
(
","
);
}
(
this
->
genes
[
subMot
][
"strand"
]).
append
(
strand
);
(
this
->
genes
[
subMot
][
"strand"
]).
append
(
strand
);
}
}
}
...
...
@@ -328,93 +296,82 @@ int Vihgo::getGenes()
// this->displayGenes();
map
<
string
,
map
<
string
,
string
>
>::
iterator
myIterC
;
for
(
myIterC
=
genes
.
begin
()
;
myIterC
!=
genes
.
end
()
;
myIterC
++
)
{
map
<
string
,
map
<
string
,
string
>>::
iterator
myIterC
;
for
(
myIterC
=
genes
.
begin
();
myIterC
!=
genes
.
end
();
myIterC
++
)
{
string
currentGene
=
myIterC
->
first
;
// cerr << "getting codons for gene " << currentGene << endl;
chromosome
=
myIterC
->
second
[
"chrom"
];
vector
<
string
>
starts
=
parseOnSep
(
myIterC
->
second
[
"start"
]
,
","
);
vector
<
string
>
ends
=
parseOnSep
(
myIterC
->
second
[
"end"
]
,
","
);
vector
<
string
>
strands
=
parseOnSep
(
myIterC
->
second
[
"strand"
]
,
","
);
vector
<
string
>
starts
=
parseOnSep
(
myIterC
->
second
[
"start"
],
","
);
vector
<
string
>
ends
=
parseOnSep
(
myIterC
->
second
[
"end"
],
","
);
vector
<
string
>
strands
=
parseOnSep
(
myIterC
->
second
[
"strand"
],
","
);
// create gene references in prot relative tables
map
<
int
,
string
>
tmpMap
;
map
<
int
,
int
>
tmpMapSecond
;
map
<
int
,
string
>
tmpMap
;
map
<
int
,
int
>
tmpMapSecond
;
this
->
protCodon
[
currentGene
]
=
tmpMap
;
this
->
refProtCorresp
[
currentGene
]
=
tmpMapSecond
;
int
protPos
=
1
;
int
geneSubPart
=
0
;
for
(
geneSubPart
=
0
;
geneSubPart
<
starts
.
size
()
;
geneSubPart
++
)
{
map
<
int
,
char
>
posList
;
for
(
geneSubPart
=
0
;
geneSubPart
<
starts
.
size
();
geneSubPart
++
)
{
map
<
int
,
char
>
posList
;
posList
=
this
->
referenceSequence
[
chromosome
];
cerr
<<
"getting position on the related chromosome "
<<
chromosome
<<
" : "
<<
posList
.
size
()
<<
endl
;
cerr
<<
"getting position on the related chromosome "
<<
chromosome
<<
" : "
<<
posList
.
size
()
<<
endl
;
// sort position in the map
map
<
int
,
char
>::
iterator
myIterE
;
map
<
int
,
char
>::
iterator
myIterE
;
vector
<
int
>
pos
;
for
(
myIterE
=
posList
.
begin
()
;
myIterE
!=
posList
.
end
()
;
myIterE
++
)
{
pos
.
push_back
(
myIterE
->
first
);
for
(
myIterE
=
posList
.
begin
();
myIterE
!=
posList
.
end
();
myIterE
++
)
{
pos
.
push_back
(
myIterE
->
first
);
}
sort
(
pos
.
begin
(),
pos
.
begin
()
+
pos
.
size
()
);
sort
(
pos
.
begin
(),
pos
.
begin
()
+
pos
.
size
()
);
if
(
strands
[
geneSubPart
]
==
"+"
)
{
if
(
strands
[
geneSubPart
]
==
"+"
)
{
cerr
<<
"
\t
Strand is + "
<<
endl
;
}
else
{
}
else
{
cerr
<<
"
\t
Strand is - "
<<
endl
;
reverse
(
pos
.
begin
(),
pos
.
end
());
reverse
(
pos
.
begin
(),
pos
.
end
());
}
string
codon
=
""
;
vector
<
int
>::
iterator
myIterF
;
string
s_start
=
starts
[
geneSubPart
];
string
s_end
=
ends
[
geneSubPart
];
int
i_start
=
string_to_int
(
s_start
);
int
i_end
=
string_to_int
(
s_end
);
int
i_start
=
string_to_int
(
s_start
);
int
i_end
=
string_to_int
(
s_end
);
cerr
<<
"Analyzing gene subpart : "
<<
currentGene
<<
" : "
<<
chromosome
<<
":"
<<
i_start
<<
"-"
<<
i_end
<<
" : "
<<
strands
[
geneSubPart
]
<<
endl
;
cerr
<<
"Analyzing gene subpart : "
<<
currentGene
<<
" : "
<<
chromosome
<<
":"
<<
i_start
<<
"-"
<<
i_end
<<
" : "
<<
strands
[
geneSubPart
]
<<
endl
;
for
(
myIterF
=
pos
.
begin
();
myIterF
!=
pos
.
end
()
;
myIterF
++
)
{
if
(
(
*
myIterF
>=
i_start
)
&&
(
*
myIterF
<=
i_end
)
)
{
for
(
myIterF
=
pos
.
begin
();
myIterF
!=
pos
.
end
();
myIterF
++
)
{
if
((
*
myIterF
>=
i_start
)
&&
(
*
myIterF
<=
i_end
))
{
codon
+=
this
->
referenceSequence
[
chromosome
][
*
myIterF
];
// store the codon if terminated
if
(
(
codon
.
length
()
%
3
==
0
)
&&
(
codon
.
length
()
!=
0
))
{
if
(
this
->
referenceCodon
.
count
(
chromosome
)
==
0
)
{
map
<
int
,
string
>
tmpMap
;
if
((
codon
.
length
()
%
3
==
0
)
&&
(
codon
.
length
()
!=
0
))
{
if
(
this
->
referenceCodon
.
count
(
chromosome
)
==
0
)
{
map
<
int
,
string
>
tmpMap
;
this
->
referenceCodon
[
chromosome
]
=
tmpMap
;
}
// fill objects
// reference genomic codon
this
->
referenceCodon
[
chromosome
][
*
myIterF
-
2
]
=
codon
;
// protein aa
this
->
protCodon
[
currentGene
][
protPos
]
=
this
->
codonTable
[
codon
];
this
->
protCodon
[
currentGene
][
protPos
]
=
this
->
codonTable
[
codon
];
// corresp between genomic and proteic numerotation
this
->
refProtCorresp
[
currentGene
][
*
myIterF
-
2
]
=
protPos
;
// logs for test purposes
// cerr << "\tAdded " << codon << " : " << this->codonTable[ codon ] << " at g. pos " << *myIterF - 2 << " ; p. pos " << protPos << " in gene " << currentGene << endl;
// cerr << "\tAdded " << codon << " : " << this->codonTable[ codon ]
// << " at g. pos " << *myIterF - 2 << " ; p. pos " << protPos << "
// in gene " << currentGene << endl;
codon
=
""
;
protPos
++
;
protPos
++
;
}
}
}
...
...
@@ -424,89 +381,82 @@ int Vihgo::getGenes()
return
0
;
}
void
Vihgo
::
displayGenes
()
{
void
Vihgo
::
displayGenes
()
{
cerr
<<
"Displaying gene list : "
<<
endl
;
map
<
string
,
map
<
string
,
string
>
>::
iterator
myIterB
;
for
(
myIterB
=
this
->
genes
.
begin
()
;
myIterB
!=
this
->
genes
.
end
()
;
myIterB
++
)
{
map
<
string
,
map
<
string
,
string
>>::
iterator
myIterB
;
for
(
myIterB
=
this
->
genes
.
begin
();
myIterB
!=
this
->
genes
.
end
();
myIterB
++
)
{
cerr
<<
"Gene : "
<<
myIterB
->
first
<<
" : "
<<
endl
;
map
<
string
,
string
>
tmpMap
;
map
<
string
,
string
>
tmpMap
;
tmpMap
=
myIterB
->
second
;
cerr
<<
"
\t
chr : "
<<
tmpMap
[
"chrom"
]
<<
" ; start : "
<<
tmpMap
[
"start"
]
<<
" ; end : "
<<
tmpMap
[
"end"
]
<<
" ; strand : "
<<
tmpMap
[
"strand"
]
<<
endl
;
cerr
<<
"
\t
chr : "
<<
tmpMap
[
"chrom"
]
<<
" ; start : "
<<
tmpMap
[
"start"
]
<<
" ; end : "
<<
tmpMap
[
"end"
]
<<
" ; strand : "
<<
tmpMap
[
"strand"
]
<<
endl
;
}
}
void
Vihgo
::
displayCodons
()
{
void
Vihgo
::
displayCodons
()
{
cerr
<<
"Displaying prot codon list : "
<<
endl
;
//
std::map<std::string, std::map< int , std::string> > protCodon;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterA
;
for
(
myIterA
=
this
->
protCodon
.
begin
()
;
myIterA
!=
this
->
protCodon
.
end
()
;
myIterA
++
)
{
//
std::map<std::string, std::map< int , std::string> > protCodon;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterA
;
for
(
myIterA
=
this
->
protCodon
.
begin
();
myIterA
!=
this
->
protCodon
.
end
();
myIterA
++
)
{
cerr
<<
"Gene : "
<<
myIterA
->
first
<<
" : "
<<
endl
;
map
<
int
,
string
>
tmpMap
;
map
<
int
,
string
>
tmpMap
;
tmpMap
=
myIterA
->
second
;
map
<
int
,
string
>::
iterator
myIterB
;
for
(
myIterB
=
tmpMap
.
begin
()
;
myIterB
!=
tmpMap
.
end
()
;
myIterB
++
)
{
cerr
<<
"
\t
Pos : "
<<
myIterB
->
first
<<
" ; AA : "
<<
myIterB
->
second
<<
endl
;
map
<
int
,
string
>::
iterator
myIterB
;
for
(
myIterB
=
tmpMap
.
begin
();
myIterB
!=
tmpMap
.
end
();
myIterB
++
)
{
cerr
<<
"
\t
Pos : "
<<
myIterB
->
first
<<
" ; AA : "
<<
myIterB
->
second
<<
endl
;
}
}
}
void
Vihgo
::
displayReferenceCodons
()
{
void
Vihgo
::
displayReferenceCodons
()
{
// std::map<std::string, std::map< int , std::string> > referenceCodon;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterA
;
map
<
int
,
string
>::
iterator
myIterB
;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterA
;
map
<
int
,
string
>::
iterator
myIterB
;
cerr
<<
"Reference codons : "
<<
endl
;
for
(
myIterA
=
this
->
referenceCodon
.
begin
()
;
myIterA
!=
this
->
referenceCodon
.
end
()
;
myIterA
++
)
{
for
(
myIterA
=
this
->
referenceCodon
.
begin
();
myIterA
!=
this
->
referenceCodon
.
end
();
myIterA
++
)
{
cerr
<<
"
\t
"
<<
myIterA
->
first
<<
" : "
<<
endl
;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
cerr
<<
"
\t\t
"
<<
myIterB
->
first
<<
" : "
<<
myIterB
->
second
<<
endl
;
}
}
}
void
Vihgo
::
displayData
(
string
incType
)
{
map
<
string
,
map
<
int
,
map
<
string
,
int
>
>
>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>
>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
void
Vihgo
::
displayData
(
string
incType
)
{
map
<
string
,
map
<
int
,
map
<
string
,
int
>>>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
if
(
incType
==
"control"
)
{
for
(
myIterA
=
this
->
controlData
.
begin
()
;
myIterA
!=
this
->
controlData
.
end
()
;
myIterA
++
)
{
if
(
incType
==
"control"
)
{
for
(
myIterA
=
this
->
controlData
.
begin
();
myIterA
!=
this
->
controlData
.
end
();
myIterA
++
)
{
cerr
<<
myIterA
->
first
<<
" : "
<<
endl
;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
cerr
<<
"
\t
"
<<
myIterB
->
first
<<
" : "
<<
endl
;
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
cerr
<<
"
\t\t
"
<<
myIterC
->
first
<<
" : "
<<
myIterC
->
second
<<
endl
;
}
}
}
}
if
(
incType
==
"sample"
)
{
for
(
myIterA
=
this
->
sampleData
.
begin
()
;
myIterA
!=
this
->
sampleData
.
end
()
;
myIterA
++
)
{
if
(
incType
==
"sample"
)
{
for
(
myIterA
=
this
->
sampleData
.
begin
();
myIterA
!=
this
->
sampleData
.
end
();
myIterA
++
)
{
cerr
<<
myIterA
->
first
<<
" : "
<<
endl
;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
cerr
<<
"
\t
"
<<
myIterB
->
first
<<
" : "
<<
endl
;
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
cerr
<<
"
\t\t
"
<<
myIterC
->
first
<<
" : "
<<
myIterC
->
second
<<
endl
;
}
}
...
...
@@ -516,90 +466,96 @@ void Vihgo::displayData( string incType )
// Method to write down the codons
// NEED A REWAMP FOR ADJUSTMENT TO NEW DATA STORAGE
void
Vihgo
::
writeCodonFile
(
string
incType
)
{
map
<
string
,
map
<
int
,
map
<
string
,
int
>
>
>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>
>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
void
Vihgo
::
writeCodonFile
(
string
incType
)
{
map
<
string
,
map
<
int
,
map
<
string
,
int
>>>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
cerr
<<
"Writing codons ... "
<<
endl
;
ofstream
codonStream
;
codonStream
.
open
(
(
this
->
codonResultFile
).
c_str
()
,
ios
::
out
);
codonStream
.
open
((
this
->
codonResultFile
).
c_str
(),
ios
::
out
);
// I/O optimisation
ios_base
::
sync_with_stdio
(
false
);
cin
.
tie
(
nullptr
);
// header
codonStream
<<
"#Chrom
\t
GenomicPosition
\t
refProt
\t
ReferenceCodon
\t
ReferenceAA
\t
aaPosition
\t
AlternativeCodon
\t
AlternativeAA/Depth
\t
AlternativeAlleleRatio
\t
crtlCounts
\t
ctrlRatio"
<<
endl
;
if
(
incType
==
"sample"
)
{
for
(
myIterA
=
this
->
sampleData
.
begin
()
;
myIterA
!=
this
->
sampleData
.
end
()
;
myIterA
++
)
{
codonStream
<<
"#Chrom
\t
GenomicPosition
\t
refProt
\t
ReferenceCodon
\t
ReferenceAA"
"
\t
aaPosition
\t
AlternativeCodon
\t
AlternativeAA/"
"Depth
\t
AlternativeAlleleRatio
\t
crtlCounts
\t
ctrlRatio"
<<
endl
;
if
(
incType
==
"sample"
)
{
for
(
myIterA
=
this
->
sampleData
.
begin
();
myIterA
!=
this
->
sampleData
.
end
();
myIterA
++
)
{
// codonStream << myIterA->first << " : " << endl;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
//
get prot name for this position
string
genes
=
isInGene
(
myIterA
->
first
,
myIterB
->
first
);
vector
<
string
>
g
;
g
=
parseOnSep
(
genes
,
","
);
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
//
get prot name for this position
string
genes
=
isInGene
(
myIterA
->
first
,
myIterB
->
first
);
vector
<
string
>
g
;
g
=
parseOnSep
(
genes
,
","
);
vector
<
string
>::
iterator
myIterD
;
for
(
myIterD
=
g
.
begin
()
;
myIterD
!=
g
.
end
()
;
myIterD
++
)
{
for
(
myIterD
=
g
.
begin
();
myIterD
!=
g
.
end
();
myIterD
++
)
{
string
ref
=
*
myIterD
;
string
refCodon
=
this
->
referenceCodon
[
myIterA
->
first
][
myIterB
->
first
];
string
refCodon
=
this
->
referenceCodon
[
myIterA
->
first
][
myIterB
->
first
];
string
refAA
=
this
->
getAAfromCodon
(
refCodon
);
int
position
=
this
->
refProtCorresp
[
ref
][
myIterB
->
first
];
double
depth
=
0.0
;
//
compute total depth at this pos
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
//
compute total depth at this pos
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
depth
+=
myIterC
->
second
;
}
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
string
altCodon
=
myIterC
->
first
;
string
altAA
=
this
->
getAAfromCodon
(
altCodon
);
double
altCount
=
myIterC
->
second
;
double
allelicRatio
=
(
altCount
/
depth
)
*
100
;
double
allelicRatio
=
(
altCount
/
depth
)
*
100
;
double
ctrlCounts
=
0.0
;
double
ctrlRatio
=
0.0
;
double
ctrlDepth
=
0.0
;
if
(
this
->
controlData
.
count
(
myIterA
->
first
)
>
0
)
{
if
(
this
->
controlData
.
count
(
myIterA
->
first
)
>
0
)
{
// cerr << "\tfound ref in ctrl : " << myIterA->first << endl;
if
(
this
->
controlData
[
myIterA
->
first
].
count
(
myIterB
->
first
)
>
0
)
{
// cerr << "\tfound position in ctrl : " << myIterB->first <<
endl;
if
(
this
->
controlData
[
myIterA
->
first
].
count
(
myIterB
->
first
)
>
0
)
{
// cerr << "\tfound position in ctrl : " << myIterB->first <<
//
endl;
// compute depth for control
map
<
string
,
int
>::
iterator
myIterE
;
map
<
string
,
int
>::
iterator
myIterE
;
for
(
myIterE
=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
].
begin
()
;
myIterE
!=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
].
end
()
;
myIterE
++
)
{
for
(
myIterE
=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
]
.
begin
();
myIterE
!=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
].
end
();
myIterE
++
)
{
ctrlDepth
+=
myIterE
->
second
;
}
if
(
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
][
altCodon
]
>
0
)
{
ctrlCounts
=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
][
altCodon
];
// cerr << "\tfound codon in ctrl : " << altCodon << " : " << this->controlData[myIterA->first][myIterB->first][altCodon] << endl;
if
(
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
]
[
altCodon
]
>
0
)
{
ctrlCounts
=
this
->
controlData
[
myIterA
->
first
][
myIterB
->
first
]
[
altCodon
];
// cerr << "\tfound codon in ctrl : " << altCodon << " : " <<
// this->controlData[myIterA->first][myIterB->first][altCodon]
// << endl;
}
ctrlRatio
=
(
ctrlCounts
/
ctrlDepth
)
*
100
;
ctrlRatio
=
(
ctrlCounts
/
ctrlDepth
)
*
100
;
}
}
// write data
codonStream
<<
myIterA
->
first
<<
"
\t
"
<<
myIterB
->
first
<<
"
\t
"
<<
ref
<<
"
\t
"
<<
refCodon
<<
"
\t
"
<<
refAA
<<
"
\t
"
<<
position
<<
"
\t
"
<<
altCodon
<<
"
\t
"
<<
altAA
<<
"
\t
"
<<
altCount
<<
"/"
<<
(
int
)
depth
<<
"
\t
"
<<
allelicRatio
<<
"
\t
"
<<
ctrlCounts
<<
"/"
<<
ctrlDepth
<<
"
\t
"
<<
ctrlRatio
<<
endl
;
codonStream
<<
myIterA
->
first
<<
"
\t
"
<<
myIterB
->
first
<<
"
\t
"
<<
ref
<<
"
\t
"
<<
refCodon
<<
"
\t
"
<<
refAA
<<
"
\t
"
<<
position
<<
"
\t
"
<<
altCodon
<<
"
\t
"
<<
altAA
<<
"
\t
"
<<
altCount
<<
"/"
<<
(
int
)
depth
<<
"
\t
"
<<
allelicRatio
<<
"
\t
"
<<
ctrlCounts
<<
"/"
<<
ctrlDepth
<<
"
\t
"
<<
ctrlRatio
<<
endl
;
}
}
}
...
...
@@ -609,13 +565,10 @@ void Vihgo::writeCodonFile( string incType )
codonStream
.
close
();
}
// Method to get annotation for variation
// Actually has been deactivated yet
// NEED A REWAMP FOR ADAPTATION TO ALL VIRUS; VCF FORMAT
int
Vihgo
::
getAnnotation
()
{
int
Vihgo
::
getAnnotation
()
{
// vector<string> fileToParse;
// cerr << "Starting annotation parsing " << endl;
// if( !( this->isAnnotationDefined() ) )
...
...
@@ -631,7 +584,8 @@ int Vihgo::getAnnotation()
// string ligne;
// string chromosome;
//
// for( myIterA = fileToParse.begin() ; myIterA != fileToParse.end() ; myIterA ++)
// for( myIterA = fileToParse.begin() ; myIterA != fileToParse.end() ; myIterA
// ++)
// {
//
// // rt parsing
...
...
@@ -665,21 +619,21 @@ int Vihgo::getAnnotation()
//
// chromosome = pyReplace( ligne , "#" , "");
// chromosome = strip(chromosome);
// cerr << "Found a # with line : " << ligne << endl ;
// cerr << "\t\tto " << chromosome << "." << endl;
// if(
this->referenceCodon.count( chromosome ) == 0 )
// cerr << "Found a # with line : " << ligne << endl
// ; cerr << "\t\tto " << chromosome << "." << endl; if(
//
this->referenceCodon.count( chromosome ) == 0 )
// {
// cerr << chromosome << " not found in reference : passing " << endl;
// this->annotationIsOk = false;
// break;
// cerr << chromosome << " not found in reference : passing "
// << endl; this->annotationIsOk = false; break;
// }
//
//
// if (this->knownMutationTable.count( chromosome ) == 0 )
// if (this->knownMutationTable.count( chromosome ) == 0
// )
// {
// map< string , string > tmpMap;
// this->knownMutationTable[chromosome] = tmpMap;
//
cerr << chromosome << " newly created as an annotation " << endl;
// this->knownMutationTable[chromosome] =
// tmpMap;
cerr << chromosome << " newly created as an annotation " << endl;
// }
// }
//
...
...
@@ -717,8 +671,8 @@ int Vihgo::getAnnotation()
// {
// continue;
// }
// this->knownMutationTable[chromosome][index] = conclusion;
//
cerr << "\tAdding " << index << " aka " << conclusion << endl;
// this->knownMutationTable[chromosome][index] =
// conclusion;
cerr << "\tAdding " << index << " aka " << conclusion << endl;
// }
// }
// annotStream.close();
...
...
@@ -726,18 +680,17 @@ int Vihgo::getAnnotation()
return
0
;
}
bool
Vihgo
::
isAnnotationDefined
()
{
if
(
((
this
->
proteaseFile
).
length
()
>
0
)
&&
(
this
->
integraseFile
.
length
()
>
0
)
&&
(
this
->
retrotranscriptaseFile
.
length
()
>
0
)
)
{
bool
Vihgo
::
isAnnotationDefined
()
{
if
(((
this
->
proteaseFile
).
length
()
>
0
)
&&
(
this
->
integraseFile
.
length
()
>
0
)
&&
(
this
->
retrotranscriptaseFile
.
length
()
>
0
))
{
return
true
;
}
return
false
;
}
// parse the control bam file to get informations on background of the virus
int
Vihgo
::
readRunControl
()
{
int
Vihgo
::
readRunControl
()
{
long
int
nbLineRead
=
0
;
int
readPassedAln
=
0
;
int
readPassedSecondary
=
0
;
...
...
@@ -747,42 +700,41 @@ int Vihgo::readRunControl()
cerr
<<
"Reading control BAM file..."
<<
endl
;
bam1_t
*
aln
=
bam_init1
();
samFile
*
fp_in
=
hts_open
(
(
this
->
getControlBamFile
()).
c_str
()
,
"r"
);
bam_hdr_t
*
bamHdr
=
sam_hdr_read
(
fp_in
);
samFile
*
fp_in
=
hts_open
((
this
->
getControlBamFile
()).
c_str
(),
"r"
);
bam_hdr_t
*
bamHdr
=
sam_hdr_read
(
fp_in
);
while
(
sam_read1
(
fp_in
,
bamHdr
,
aln
)
>
0
)
{
nbLineRead
++
;
if
((
nbLineRead
!=
0
)
&&
((
nbLineRead
%
200000
)
==
0
)
)
{
cerr
<<
"
\t
"
<<
nbLineRead
<<
" aln read ;
\n\t\t
Skipped : "
<<
readPassedAln
<<
" unaligned ; "
<<
readPassedSecondary
<<
" secondary alignements ; "
<<
readPassedSupplementary
<<
" supplementary alignements ; "
<<
readPassedMQ
<<
" bad aln quality ; "
<<
readPassedCIGAROp
<<
" special CIGAR operation "
<<
endl
;
while
(
sam_read1
(
fp_in
,
bamHdr
,
aln
)
>
0
)
{
nbLineRead
++
;
if
((
nbLineRead
!=
0
)
&&
((
nbLineRead
%
200000
)
==
0
))
{
cerr
<<
"
\t
"
<<
nbLineRead
<<
" aln read ;
\n\t\t
Skipped : "
<<
readPassedAln
<<
" unaligned ; "
<<
readPassedSecondary
<<
" secondary alignements ; "
<<
readPassedSupplementary
<<
" supplementary alignements ; "
<<
readPassedMQ
<<
" bad aln quality ; "
<<
readPassedCIGAROp
<<
" special CIGAR operation "
<<
endl
;
}
// skip alignement if is unmmapped
if
(
(
aln
->
core
.
flag
&
BAM_FUNMAP
)
==
BAM_FUNMAP
)
{
readPassedAln
++
;
if
((
aln
->
core
.
flag
&
BAM_FUNMAP
)
==
BAM_FUNMAP
)
{
readPassedAln
++
;
continue
;
}
// skip alignement if secondary alignement
if
(
(
aln
->
core
.
flag
&
BAM_FSECONDARY
)
==
BAM_FSECONDARY
)
{
readPassedSecondary
++
;
if
((
aln
->
core
.
flag
&
BAM_FSECONDARY
)
==
BAM_FSECONDARY
)
{
readPassedSecondary
++
;
continue
;
}
// skip alignement if supplmentary alignement
if
(
(
aln
->
core
.
flag
&
BAM_FSUPPLEMENTARY
)
==
BAM_FSUPPLEMENTARY
)
{
readPassedSupplementary
++
;
if
((
aln
->
core
.
flag
&
BAM_FSUPPLEMENTARY
)
==
BAM_FSUPPLEMENTARY
)
{
readPassedSupplementary
++
;
continue
;
}
// Skip alignment if quality score is less than 30
if
(
aln
->
core
.
qual
<
30
)
{
readPassedMQ
++
;
if
(
aln
->
core
.
qual
<
30
)
{
readPassedMQ
++
;
continue
;
}
...
...
@@ -792,9 +744,10 @@ int Vihgo::readRunControl()
string
chr
=
bamHdr
->
target_name
[
aln
->
core
.
tid
];
// Deal with unknown chromosome
if
(
!
(
this
->
isInGeneList
(
chr
)
))
{
cerr
<<
"Read is mapped on a reference not known. Please verify the reference provided."
<<
endl
;
if
(
!
(
this
->
isInGeneList
(
chr
)))
{
cerr
<<
"Read is mapped on a reference not known. Please verify the "
"reference provided."
<<
endl
;
continue
;
}
...
...
@@ -806,21 +759,19 @@ int Vihgo::readRunControl()
// cerr << "Read is in a known gene : " << *myIterA << endl;
// }
// cigar string event detection ; need to be improved to deal with insertion / deletion
// pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD, BACK
// cigar string event detection ; need to be improved to deal with insertion
// / deletion pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD,
// BACK
bool
readNeedToBeSkipped
=
false
;
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
if
(
op
==
BAM_CREF_SKIP
||
op
==
BAM_CPAD
||
op
==
BAM_CBACK
)
{
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
if
(
op
==
BAM_CREF_SKIP
||
op
==
BAM_CPAD
||
op
==
BAM_CBACK
)
{
readNeedToBeSkipped
=
true
;
}
}
if
(
readNeedToBeSkipped
)
{
readPassedCIGAROp
++
;
if
(
readNeedToBeSkipped
)
{
readPassedCIGAROp
++
;
continue
;
}
...
...
@@ -829,89 +780,82 @@ int Vihgo::readRunControl()
// string quality = bam1_qual( aln );
// deal with stop option
if
(
this
->
getCountStop
()
)
{
if
(
this
->
detectStopInRead
(
aln
,
pos
,
chr
))
{
if
(
this
->
getCountStop
())
{
if
(
this
->
detectStopInRead
(
aln
,
pos
,
chr
))
{
continue
;
}
}
int
cumulOffset
=
0
;
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
// deal with the cigar special ops
// HARD CLIPPING : No offset increment , no analysis of bases
if
(
op
==
BAM_CHARD_CLIP
)
{
if
(
op
==
BAM_CHARD_CLIP
)
{
continue
;
}
// SOFT CLIPPING : offset increment ; no analysis of bases
if
(
op
==
BAM_CSOFT_CLIP
)
{
if
(
op
==
BAM_CSOFT_CLIP
)
{
cumulOffset
+=
ol
;
continue
;
}
// insertion
if
(
op
==
BAM_CINS
)
{
if
(
op
==
BAM_CINS
)
{
// progress on query to get the inserted sequence.
string
insertedSequence
=
"+"
;
// need to determine on which codon we are
int
currentGenomicPosition
=
pos
;
int
codonStartPos
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
;
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
1
;
}
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
2
;
}
}
// get the bases of the inserted sequence
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
cumulOffset
+
x
);
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
cumulOffset
+
x
);
char
base
;
base
=
this
->
getBase
(
a
);
base
=
this
->
getBase
(
a
);
insertedSequence
+=
base
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
codonStartPos
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
insertedSequence
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
insertedSequence
)
>
0
))
{
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : progress length of insertion
cumulOffset
+=
ol
;
...
...
@@ -920,57 +864,56 @@ int Vihgo::readRunControl()
}
// insertion
if
(
op
==
BAM_CDEL
)
{
if
(
op
==
BAM_CDEL
)
{
// progress on query to get the inserted sequence.
string
deletedSequence
=
"-"
;
// need to determine on which codon we are
int
currentGenomicPosition
=
pos
;
int
codonStartPos
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
;
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
1
;
}
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
2
;
}
}
// get the bases of the reference sequence
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
char
base
;
base
=
this
->
getReferenceBase
(
chr
,
x
+
currentGenomicPosition
);
base
=
this
->
getReferenceBase
(
chr
,
x
+
currentGenomicPosition
);
deletedSequence
+=
base
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
codonStartPos
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
deletedSequence
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
deletedSequence
)
>
0
))
{
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : no progress
// on reference : progress the length of the op
...
...
@@ -978,51 +921,48 @@ int Vihgo::readRunControl()
continue
;
}
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
// check if we are at a codon position
int
currentGenomicPosition
=
pos
+
i
;
string
codon
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
char
base
;
base
=
this
->
getBase
(
a
);
base
=
this
->
getBase
(
a
);
codon
+=
base
;
}
i
+=
2
;
if
(
codon
.
find
(
"!"
)
!=
string
::
npos
)
{
if
(
codon
.
find
(
"!"
)
!=
string
::
npos
)
{
continue
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
currentGenomicPosition
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
currentGenomicPosition
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
currentGenomicPosition
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
currentGenomicPosition
].
count
(
codon
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
currentGenomicPosition
].
count
(
codon
)
>
0
))
{
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
else
{
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in
// control data " << endl;
}
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
else
{
continue
;
}
}
...
...
@@ -1031,34 +971,33 @@ int Vihgo::readRunControl()
}
// destruct objects
bam_hdr_destroy
(
bamHdr
);
sam_close
(
fp_in
);
bam_destroy1
(
aln
);
bam_hdr_destroy
(
bamHdr
);
sam_close
(
fp_in
);
bam_destroy1
(
aln
);
return
0
;
}
int
Vihgo
::
variantCalling
()
{
int
Vihgo
::
variantCalling
()
{
// estimate BG noise on control data
// we need to eliminate "real" variants by testing their vaf to all the sample VAF
// If a variant is detected, a new turn begin while excluding detected variation.
// we need to eliminate "real" variants by testing their vaf to all the sample
// VAF If a variant is detected, a new turn begin while excluding detected
// variation.
// First compute all VAF
map
<
string
,
map
<
int
,
map
<
string
,
int
>
>
>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>
>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
map
<
string
,
map
<
int
,
vector
<
double
>
>
>
VAFControlTable
;
map
<
string
,
map
<
int
,
vector
<
double
>
>
>
VAFTable
;
if
(
this
->
isThereAControl
()
)
{
map
<
string
,
map
<
int
,
map
<
string
,
int
>>>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
map
<
string
,
map
<
int
,
vector
<
double
>>>
VAFControlTable
;
map
<
string
,
map
<
int
,
vector
<
double
>>>
VAFTable
;
if
(
this
->
isThereAControl
())
{
// VAF for control
for
(
myIterA
=
this
->
controlData
.
begin
()
;
myIterA
!=
this
->
controlData
.
end
()
;
myIterA
++
)
{
for
(
myIterA
=
this
->
controlData
.
begin
();
myIterA
!=
this
->
controlData
.
end
();
myIterA
++
)
{
// cerr << myIterA->first << " : " << endl;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
// cerr << "\t" << myIterB->first << " : " << endl;
double
totalDepth
=
0.0
;
double
altNumber
=
0.0
;
...
...
@@ -1066,48 +1005,46 @@ int Vihgo::variantCalling()
string
refCodon
=
this
->
referenceCodon
[
myIterA
->
first
][
myIterB
->
first
];
string
aa
=
this
->
getAAfromCodon
(
refCodon
);
// cerr << "Analysing codon " << myIterA->first << ":" <<
myIterB->first << " ; Ref is : " << refCodon << " : " << aa << endl;
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
// cerr << "Analysing codon " << myIterA->first << ":" <<
//
myIterB->first << " ; Ref is : " << refCodon << " : " << aa << endl;
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
string
currentCodon
=
myIterC
->
first
;
string
currentAa
=
this
->
getAAfromCodon
(
currentCodon
);
// cerr << "\tin control : " << currentCodon << " : " << currentAa << " : " << myIterC->second << endl;
// cerr << "\tin control : " << currentCodon << " : " << currentAa <<
// " : " << myIterC->second << endl;
if
(
!
(
currentAa
==
aa
))
{
if
(
!
(
currentAa
==
aa
))
{
altNumber
+=
myIterC
->
second
;
}
totalDepth
+=
myIterC
->
second
;
}
// cerr << "\t\tRepresenting in total : " << altNumber << "/" << totalDepth << endl;
// cerr << "\t\tRepresenting in total : " << altNumber << "/" <<
// totalDepth << endl;
if
(
!
(
VAFControlTable
.
count
(
myIterA
->
first
)
>
0
)
)
{
map
<
int
,
vector
<
double
>
>
tmpMap
;
if
(
!
(
VAFControlTable
.
count
(
myIterA
->
first
)
>
0
))
{
map
<
int
,
vector
<
double
>>
tmpMap
;
VAFControlTable
[
myIterA
->
first
]
=
tmpMap
;
}
double
x
=
(
altNumber
/
totalDepth
);
vector
<
double
>
tmpVect
;
tmpVect
.
push_back
(
altNumber
);
tmpVect
.
push_back
(
totalDepth
);
tmpVect
.
push_back
(
altNumber
);
tmpVect
.
push_back
(
totalDepth
);
VAFControlTable
[
myIterA
->
first
][
myIterB
->
first
]
=
tmpVect
;
// cerr << myIterA->first << " : " << myIterB->first << " : " << VAFControlTable[myIterA->first][myIterB->first] << endl;
// cerr << myIterA->first << " : " << myIterB->first << " : " <<
// VAFControlTable[myIterA->first][myIterB->first] << endl;
}
}
}
// VAF for sample
for
(
myIterA
=
this
->
sampleData
.
begin
()
;
myIterA
!=
this
->
sampleData
.
end
()
;
myIterA
++
)
{
for
(
myIterA
=
this
->
sampleData
.
begin
();
myIterA
!=
this
->
sampleData
.
end
();
myIterA
++
)
{
// cerr << myIterA->first << " : " << endl;
for
(
myIterB
=
(
myIterA
->
second
).
begin
()
;
myIterB
!=
(
myIterA
->
second
).
end
()
;
myIterB
++
)
{
for
(
myIterB
=
(
myIterA
->
second
).
begin
();
myIterB
!=
(
myIterA
->
second
).
end
();
myIterB
++
)
{
// cerr << "\t" << myIterB->first << " : " << endl;
double
totalDepth
=
0.0
;
double
altNumber
=
0.0
;
...
...
@@ -1115,82 +1052,76 @@ int Vihgo::variantCalling()
string
refCodon
=
this
->
referenceCodon
[
myIterA
->
first
][
myIterB
->
first
];
string
aa
=
this
->
getAAfromCodon
(
refCodon
);
// cerr << "Analysing codon " << myIterA->first << ":" << myIterB->first
<< " ; Ref is : " << refCodon << " : " << aa << endl;
for
(
myIterC
=
(
myIterB
->
second
).
begin
()
;
myIterC
!=
(
myIterB
->
second
).
end
()
;
myIterC
++
)
{
// cerr << "Analysing codon " << myIterA->first << ":" << myIterB->first
//
<< " ; Ref is : " << refCodon << " : " << aa << endl;
for
(
myIterC
=
(
myIterB
->
second
).
begin
();
myIterC
!=
(
myIterB
->
second
).
end
();
myIterC
++
)
{
string
currentCodon
=
myIterC
->
first
;
string
currentAa
=
this
->
getAAfromCodon
(
currentCodon
);
// cerr << "\tin sample : " << currentCodon << " : " << currentAa << " : " << myIterC->second << endl;
// cerr << "\tin sample : " << currentCodon << " : " << currentAa << " :
// " << myIterC->second << endl;
if
(
!
(
currentAa
==
aa
))
{
if
(
!
(
currentAa
==
aa
))
{
altNumber
+=
myIterC
->
second
;
}
totalDepth
+=
myIterC
->
second
;
}
// cerr << "\t\tRepresenting in total : " << altNumber << "/" << totalDepth << endl;
// cerr << "\t\tRepresenting in total : " << altNumber << "/" <<
// totalDepth << endl;
if
(
!
(
VAFTable
.
count
(
myIterA
->
first
)
>
0
)
)
{
map
<
int
,
vector
<
double
>
>
tmpMap
;
if
(
!
(
VAFTable
.
count
(
myIterA
->
first
)
>
0
))
{
map
<
int
,
vector
<
double
>>
tmpMap
;
VAFTable
[
myIterA
->
first
]
=
tmpMap
;
}
double
x
=
(
altNumber
/
totalDepth
);
vector
<
double
>
tmpVect
;
tmpVect
.
push_back
(
altNumber
);
tmpVect
.
push_back
(
totalDepth
);
tmpVect
.
push_back
(
altNumber
);
tmpVect
.
push_back
(
totalDepth
);
// VAFControlTable[myIterA->first][myIterB->first] = x * 100;
VAFTable
[
myIterA
->
first
][
myIterB
->
first
]
=
tmpVect
;
// cerr << myIterA->first << " : " << myIterB->first << " : " << VAFControlTable[myIterA->first][myIterB->first] << endl;
// cerr << myIterA->first << " : " << myIterB->first << " : " <<
// VAFControlTable[myIterA->first][myIterB->first] << endl;
}
}
double
totalAlt
=
0.0
;
double
totalDepth
=
0.0
;
// looping while we find a variation
// a variation is a position where the ratio is different from the whole BG noise from the sample
// a variation is a position where the ratio is different from the whole BG
// noise from the sample
int
numberOfVariation
=
1
;
// we'll store variation position in this map and exclude the position found in it at each turn
map
<
string
,
map
<
int
,
int
>
>
positionToExclude
;
// we'll store variation position in this map and exclude the position found
// in it at each turn
map
<
string
,
map
<
int
,
int
>>
positionToExclude
;
int
nTurn
=
1
;
while
(
numberOfVariation
>
0
)
{
while
(
numberOfVariation
>
0
)
{
cerr
<<
"Computing background noise : turn "
<<
nTurn
<<
endl
;
numberOfVariation
=
0
;
vector
<
double
>
t
;
t
.
push_back
(
0.0
);
t
.
push_back
(
0.0
);
t
.
push_back
(
0.0
);
t
.
push_back
(
0.0
);
vector
<
double
>
a
;
a
.
push_back
(
0.0
);
a
.
push_back
(
0.0
);
a
.
push_back
(
0.0
);
a
.
push_back
(
0.0
);
map
<
string
,
map
<
int
,
vector
<
double
>
>
>::
iterator
myIterD
;
map
<
int
,
vector
<
double
>
>::
iterator
myIterE
;
map
<
string
,
map
<
int
,
vector
<
double
>>
>::
iterator
myIterD
;
map
<
int
,
vector
<
double
>
>::
iterator
myIterE
;
// compute a
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
()
;
myIterE
!=
(
myIterD
->
second
).
end
()
;
myIterE
++
)
{
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
();
myIterE
!=
(
myIterD
->
second
).
end
();
myIterE
++
)
{
// check if position exist in the excluded position
if
(
positionToExclude
.
count
(
myIterD
->
first
)
>
0
)
{
if
(
positionToExclude
[
myIterD
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
if
(
positionToExclude
.
count
(
myIterD
->
first
)
>
0
)
{
if
(
positionToExclude
[
myIterD
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
continue
;
}
}
a
[
0
]
+=
VAFTable
[
myIterD
->
first
][
myIterE
->
first
][
0
];
a
[
1
]
+=
VAFTable
[
myIterD
->
first
][
myIterE
->
first
][
1
];
}
...
...
@@ -1198,18 +1129,14 @@ int Vihgo::variantCalling()
totalAlt
=
a
[
0
];
totalDepth
=
a
[
1
];
// test each position
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
()
;
myIterE
!=
(
myIterD
->
second
).
end
()
;
myIterE
++
)
{
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
();
myIterE
!=
(
myIterD
->
second
).
end
();
myIterE
++
)
{
// check if position exist in the excluded position
if
(
positionToExclude
.
count
(
myIterD
->
first
)
>
0
)
{
if
(
positionToExclude
[
myIterD
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
if
(
positionToExclude
.
count
(
myIterD
->
first
)
>
0
)
{
if
(
positionToExclude
[
myIterD
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
continue
;
}
}
...
...
@@ -1217,36 +1144,40 @@ int Vihgo::variantCalling()
t
[
0
]
=
VAFTable
[
myIterD
->
first
][
myIterE
->
first
][
0
];
t
[
1
]
=
VAFTable
[
myIterD
->
first
][
myIterE
->
first
][
1
];
double
p
=
chisquare
(
t
,
a
);
// cerr << "testing : " << t[0] << " : " << t[1] << " vs " << a[0] << " : " << a[1] << " ; Pval = " << p << endl;
double
p
=
chisquare
(
t
,
a
);
// cerr << "testing : " << t[0] << " : " << t[1] << " vs " << a[0] << "
// : " << a[1] << " ; Pval = " << p << endl;
if
(
p
<
0.05
)
{
numberOfVariation
++
;
if
(
p
<
0.05
)
{
numberOfVariation
++
;
positionToExclude
[
myIterD
->
first
][
myIterE
->
first
]
=
1
;
cerr
<<
"Excluding "
<<
myIterD
->
first
<<
":"
<<
myIterE
->
first
<<
" with "
<<
t
[
0
]
<<
" : "
<<
t
[
1
]
<<
" vs "
<<
a
[
0
]
<<
" : "
<<
a
[
1
]
<<
" ; Pval = "
<<
p
<<
endl
;
cerr
<<
"Excluding "
<<
myIterD
->
first
<<
":"
<<
myIterE
->
first
<<
" with "
<<
t
[
0
]
<<
" : "
<<
t
[
1
]
<<
" vs "
<<
a
[
0
]
<<
" : "
<<
a
[
1
]
<<
" ; Pval = "
<<
p
<<
endl
;
}
}
}
cerr
<<
"End of turn, with "
<<
numberOfVariation
<<
" to exclude"
<<
endl
;
nTurn
++
;
nTurn
++
;
}
// now counting for the sample.
vector
<
double
>
bg
;
bg
.
push_back
(
totalAlt
);
bg
.
push_back
(
totalDepth
);
bg
.
push_back
(
totalAlt
);
bg
.
push_back
(
totalDepth
);
// map<string , map<int , vector<double> > > VAFControlTable;
// variant calling itself
// open output files
// result file
ofstream
outresultStream
;
outresultStream
.
open
(
(
this
->
outputFile
).
c_str
()
,
ios
::
out
);
outresultStream
<<
"#chromosome
\t
gPosition
\t
gene
\t
AAPosition
\t
referenceCodon
\t
referenceAA
\t
variantCodon
\t
variantAA
\t
totalDepth
\t
AlternativeAlleleratio
\t
FETp-value
\t
FETbgp-value
\t
Annotation"
<<
endl
;
outresultStream
.
open
((
this
->
outputFile
).
c_str
(),
ios
::
out
);
outresultStream
<<
"#chromosome
\t
gPosition
\t
gene
\t
AAPosition
\t
referenceCodon"
"
\t
referenceAA
\t
variantCodon
\t
variantAA
\t
totalDepth
\t
Alter"
"nativeAlleleratio
\t
FETp-value
\t
FETbgp-value
\t
Annotation"
<<
endl
;
// test each position
cerr
<<
"Variant calling in progress ... "
<<
endl
;
...
...
@@ -1254,36 +1185,31 @@ int Vihgo::variantCalling()
int
nbVariant
=
0
;
int
nbCall
=
0
;
map
<
string
,
map
<
int
,
vector
<
double
>
>
>::
iterator
myIterD
;
map
<
int
,
vector
<
double
>
>::
iterator
myIterE
;
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
()
;
myIterE
!=
(
myIterD
->
second
).
end
()
;
myIterE
++
)
{
map
<
string
,
int
>
tmpMap
;
map
<
string
,
map
<
int
,
vector
<
double
>>>::
iterator
myIterD
;
map
<
int
,
vector
<
double
>>::
iterator
myIterE
;
for
(
myIterD
=
VAFTable
.
begin
();
myIterD
!=
VAFTable
.
end
();
myIterD
++
)
{
for
(
myIterE
=
(
myIterD
->
second
).
begin
();
myIterE
!=
(
myIterD
->
second
).
end
();
myIterE
++
)
{
map
<
string
,
int
>
tmpMap
;
tmpMap
=
sampleData
[
myIterD
->
first
][
myIterE
->
first
];
map
<
string
,
int
>::
iterator
myIterF
;
map
<
string
,
int
>::
iterator
myIterF
;
double
total
=
0
;
for
(
myIterF
=
tmpMap
.
begin
()
;
myIterF
!=
tmpMap
.
end
()
;
myIterF
++
)
{
for
(
myIterF
=
tmpMap
.
begin
();
myIterF
!=
tmpMap
.
end
();
myIterF
++
)
{
total
+=
myIterF
->
second
;
}
// pass if depth isn't enough
if
(
total
<
this
->
depthThreshold
)
{
nbCall
++
;
if
(
total
<
this
->
depthThreshold
)
{
nbCall
++
;
continue
;
}
for
(
myIterF
=
tmpMap
.
begin
()
;
myIterF
!=
tmpMap
.
end
()
;
myIterF
++
)
{
for
(
myIterF
=
tmpMap
.
begin
();
myIterF
!=
tmpMap
.
end
();
myIterF
++
)
{
cerr
<<
"
\t\t
"
<<
nbCall
<<
endl
;
if
((
nbCall
!=
0
)
&&
((
nbCall
%
1000
)
==
0
)
)
{
cerr
<<
"
\t
"
<<
nbCall
<<
" position tested ; with : "
<<
nbVariant
<<
" found"
<<
endl
;
if
((
nbCall
!=
0
)
&&
((
nbCall
%
1000
)
==
0
))
{
cerr
<<
"
\t
"
<<
nbCall
<<
" position tested ; with : "
<<
nbVariant
<<
" found"
<<
endl
;
}
// cerr << "\t\tgetting ref informations ..." << endl;
...
...
@@ -1295,118 +1221,112 @@ int Vihgo::variantCalling()
string
currentAa
=
this
->
getAAfromCodon
(
currentCodon
);
// disengage variant calling if ref aa = alt aa (synonymous)
if
(
aa
==
currentAa
)
{
if
(
aa
==
currentAa
)
{
continue
;
}
vector
<
double
>
s
;
s
.
push_back
(
0.0
);
s
.
push_back
(
0.0
);
s
.
push_back
(
0.0
);
s
.
push_back
(
0.0
);
vector
<
double
>
c
;
c
.
push_back
(
0.0
);
c
.
push_back
(
0.0
);
c
.
push_back
(
0.0
);
c
.
push_back
(
0.0
);
if
(
this
->
isThereAControl
()
)
{
if
(
VAFControlTable
.
count
(
myIterD
->
first
)
>
0
)
{
if
(
(
VAFControlTable
[
myIterD
->
first
]).
count
(
myIterE
->
first
)
>
0
)
{
if
(
this
->
isThereAControl
())
{
if
(
VAFControlTable
.
count
(
myIterD
->
first
)
>
0
)
{
if
((
VAFControlTable
[
myIterD
->
first
]).
count
(
myIterE
->
first
)
>
0
)
{
c
[
0
]
=
VAFControlTable
[
myIterD
->
first
][
myIterE
->
first
][
0
];
c
[
1
]
=
VAFControlTable
[
myIterD
->
first
][
myIterE
->
first
][
1
];
}
}
}
// cerr << "getting myIterF second : " << myIterF->first << " : " << myIterF->second << endl;
// cerr << "getting myIterF second : " << myIterF->first << " : " <<
// myIterF->second << endl;
s
[
0
]
=
myIterF
->
second
;
s
[
1
]
=
total
;
// control if ratio is enough
double
ratio
=
s
[
0
]
/
total
;
if
(
ratio
<
this
->
ratioThreshold
)
{
nbCall
++
;
if
(
ratio
<
this
->
ratioThreshold
)
{
nbCall
++
;
continue
;
}
if
(
(
(
s
[
0
]
/
s
[
1
])
<
(
c
[
0
]
/
c
[
1
])
)
and
(
this
->
isThereAControl
())
)
{
nbCall
++
;
if
(((
s
[
0
]
/
s
[
1
])
<
(
c
[
0
]
/
c
[
1
]))
and
(
this
->
isThereAControl
()))
{
nbCall
++
;
continue
;
}
double
p
;
double
pc
;
// FET test
if
(
this
->
isThereAControl
()
)
{
cerr
<<
"
\t
Testing : "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
c
[
0
]
<<
" : "
<<
c
[
1
];
double
p
=
fisher_test
(
s
,
c
);
if
(
this
->
isThereAControl
())
{
cerr
<<
"
\t
Testing : "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
c
[
0
]
<<
" : "
<<
c
[
1
];
double
p
=
fisher_test
(
s
,
c
);
cerr
<<
" ; Pval = "
<<
p
<<
endl
;
}
else
{
}
else
{
p
=
0
;
}
if
(
p
<
0.01
)
{
if
(
p
<
0.01
)
{
double
pc
;
string
annotation
=
"No annotation found"
;
// need to perform the chi2 test vs background noise
pc
=
fisher_test
(
s
,
bg
);
cerr
<<
"Performing test vs BG Noise : "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
bg
[
0
]
<<
" : "
<<
bg
[
1
]
<<
" ; Pval = "
<<
pc
<<
endl
;
pc
=
fisher_test
(
s
,
bg
);
cerr
<<
"Performing test vs BG Noise : "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
bg
[
0
]
<<
" : "
<<
bg
[
1
]
<<
" ; Pval = "
<<
pc
<<
endl
;
// outresultStream << "#gene\tAAPosition\treferenceCodon\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlternativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation" << endl;
// outresultStream <<
// "#gene\tAAPosition\treferenceCodon\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlternativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation"
// << endl;
if
(
pc
<
0.01
)
{
if
(
pc
<
0.01
)
{
// get the protein name
string
prot
;
map
<
string
,
map
<
int
,
int
>
>::
iterator
myIterG
;
for
(
myIterG
=
this
->
refProtCorresp
.
begin
()
;
myIterG
!=
this
->
refProtCorresp
.
end
()
;
myIterG
++
)
{
if
(
this
->
refProtCorresp
[
myIterG
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
map
<
string
,
map
<
int
,
int
>
>::
iterator
myIterG
;
for
(
myIterG
=
this
->
refProtCorresp
.
begin
();
myIterG
!=
this
->
refProtCorresp
.
end
();
myIterG
++
)
{
if
(
this
->
refProtCorresp
[
myIterG
->
first
].
count
(
myIterE
->
first
)
>
0
)
{
prot
=
myIterG
->
first
;
break
;
}
}
// write results
cerr
<<
"Variation "
<<
myIterD
->
first
<<
":"
<<
myIterE
->
first
<<
" with "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
c
[
0
]
<<
" : "
<<
c
[
1
]
<<
" ; ctrlPval = "
<<
p
<<
" ; bgPval = "
<<
pc
<<
endl
;
outresultStream
<<
myIterD
->
first
<<
"
\t
"
<<
myIterE
->
first
<<
"
\t
"
<<
prot
<<
"
\t
"
<<
this
->
refProtCorresp
[
prot
][
myIterE
->
first
]
<<
"
\t
"
<<
refCodon
<<
"
\t
"
<<
aa
<<
"
\t
"
<<
currentCodon
<<
"
\t
"
<<
currentAa
<<
"
\t
"
<<
s
[
1
]
<<
"
\t
"
<<
ratio
<<
"
\t
"
<<
p
<<
"
\t
"
<<
pc
<<
"
\t
"
<<
annotation
<<
endl
;
nbVariant
++
;
cerr
<<
"Variation "
<<
myIterD
->
first
<<
":"
<<
myIterE
->
first
<<
" with "
<<
s
[
0
]
<<
" : "
<<
s
[
1
]
<<
" vs "
<<
c
[
0
]
<<
" : "
<<
c
[
1
]
<<
" ; ctrlPval = "
<<
p
<<
" ; bgPval = "
<<
pc
<<
endl
;
outresultStream
<<
myIterD
->
first
<<
"
\t
"
<<
myIterE
->
first
<<
"
\t
"
<<
prot
<<
"
\t
"
<<
this
->
refProtCorresp
[
prot
][
myIterE
->
first
]
<<
"
\t
"
<<
refCodon
<<
"
\t
"
<<
aa
<<
"
\t
"
<<
currentCodon
<<
"
\t
"
<<
currentAa
<<
"
\t
"
<<
s
[
1
]
<<
"
\t
"
<<
ratio
<<
"
\t
"
<<
p
<<
"
\t
"
<<
pc
<<
"
\t
"
<<
annotation
<<
endl
;
nbVariant
++
;
}
}
nbCall
++
;
nbCall
++
;
}
}
}
outresultStream
.
close
();
return
0
;
}
// parse the bam file to get informations on data of the experiment
int
Vihgo
::
readSample
()
{
int
Vihgo
::
readSample
()
{
long
int
nbLineRead
=
0
;
int
readPassedAln
=
0
;
int
readPassedSecondary
=
0
;
...
...
@@ -1416,42 +1336,41 @@ int Vihgo::readSample()
cerr
<<
"Reading BAM file..."
<<
endl
;
bam1_t
*
aln
=
bam_init1
();
samFile
*
fp_in
=
hts_open
(
(
this
->
getBamFile
()).
c_str
()
,
"r"
);
bam_hdr_t
*
bamHdr
=
sam_hdr_read
(
fp_in
);
samFile
*
fp_in
=
hts_open
((
this
->
getBamFile
()).
c_str
(),
"r"
);
bam_hdr_t
*
bamHdr
=
sam_hdr_read
(
fp_in
);
while
(
sam_read1
(
fp_in
,
bamHdr
,
aln
)
>
0
)
{
nbLineRead
++
;
if
((
nbLineRead
!=
0
)
&&
((
nbLineRead
%
200000
)
==
0
)
)
{
cerr
<<
"
\t
"
<<
nbLineRead
<<
" aln read ;
\n\t\t
Skipped : "
<<
readPassedAln
<<
" unaligned ; "
<<
readPassedSecondary
<<
" secondary alignements ; "
<<
readPassedSupplementary
<<
" supplementary alignements ; "
<<
readPassedMQ
<<
" bad aln quality ; "
<<
readPassedCIGAROp
<<
" special CIGAR operation "
<<
endl
;
while
(
sam_read1
(
fp_in
,
bamHdr
,
aln
)
>
0
)
{
nbLineRead
++
;
if
((
nbLineRead
!=
0
)
&&
((
nbLineRead
%
200000
)
==
0
))
{
cerr
<<
"
\t
"
<<
nbLineRead
<<
" aln read ;
\n\t\t
Skipped : "
<<
readPassedAln
<<
" unaligned ; "
<<
readPassedSecondary
<<
" secondary alignements ; "
<<
readPassedSupplementary
<<
" supplementary alignements ; "
<<
readPassedMQ
<<
" bad aln quality ; "
<<
readPassedCIGAROp
<<
" special CIGAR operation "
<<
endl
;
}
// skip alignement if is unmmapped
if
(
(
aln
->
core
.
flag
&
BAM_FUNMAP
)
==
BAM_FUNMAP
)
{
readPassedAln
++
;
if
((
aln
->
core
.
flag
&
BAM_FUNMAP
)
==
BAM_FUNMAP
)
{
readPassedAln
++
;
continue
;
}
// skip alignement if secondary alignement
if
(
(
aln
->
core
.
flag
&
BAM_FSECONDARY
)
==
BAM_FSECONDARY
)
{
readPassedSecondary
++
;
if
((
aln
->
core
.
flag
&
BAM_FSECONDARY
)
==
BAM_FSECONDARY
)
{
readPassedSecondary
++
;
continue
;
}
// skip alignement if supplmentary alignement
if
(
(
aln
->
core
.
flag
&
BAM_FSUPPLEMENTARY
)
==
BAM_FSUPPLEMENTARY
)
{
readPassedSupplementary
++
;
if
((
aln
->
core
.
flag
&
BAM_FSUPPLEMENTARY
)
==
BAM_FSUPPLEMENTARY
)
{
readPassedSupplementary
++
;
continue
;
}
// Skip alignment if quality score is less than 30
if
(
aln
->
core
.
qual
<
30
)
{
readPassedMQ
++
;
if
(
aln
->
core
.
qual
<
30
)
{
readPassedMQ
++
;
continue
;
}
...
...
@@ -1461,9 +1380,10 @@ int Vihgo::readSample()
string
chr
=
bamHdr
->
target_name
[
aln
->
core
.
tid
];
// Deal with unknown chromosome
if
(
!
(
this
->
isInGeneList
(
chr
)
))
{
cerr
<<
"Read is mapped on a reference not known. Please verify the reference provided."
<<
endl
;
if
(
!
(
this
->
isInGeneList
(
chr
)))
{
cerr
<<
"Read is mapped on a reference not known. Please verify the "
"reference provided."
<<
endl
;
continue
;
}
...
...
@@ -1475,21 +1395,19 @@ int Vihgo::readSample()
// cerr << "Read is in a known gene : " << *myIterA << endl;
// }
// cigar string event detection ; need to be improved to deal with insertion / deletion
// pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD, BACK
// cigar string event detection ; need to be improved to deal with insertion
// / deletion pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD,
// BACK
bool
readNeedToBeSkipped
=
false
;
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
if
(
op
==
BAM_CREF_SKIP
||
op
==
BAM_CPAD
||
op
==
BAM_CBACK
)
{
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
if
(
op
==
BAM_CREF_SKIP
||
op
==
BAM_CPAD
||
op
==
BAM_CBACK
)
{
readNeedToBeSkipped
=
true
;
}
}
if
(
readNeedToBeSkipped
)
{
readPassedCIGAROp
++
;
if
(
readNeedToBeSkipped
)
{
readPassedCIGAROp
++
;
continue
;
}
...
...
@@ -1498,89 +1416,82 @@ int Vihgo::readSample()
// string quality = bam1_qual( aln );
// deal with stop option
if
(
this
->
getCountStop
()
)
{
if
(
this
->
detectStopInRead
(
aln
,
pos
,
chr
))
{
if
(
this
->
getCountStop
())
{
if
(
this
->
detectStopInRead
(
aln
,
pos
,
chr
))
{
continue
;
}
}
int
cumulOffset
=
0
;
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
for
(
int
k
=
0
;
k
<
aln
->
core
.
n_cigar
;
k
++
)
{
const
int
op
=
bam_cigar_op
(
bam_get_cigar
(
aln
)[
k
]);
const
int
ol
=
bam_cigar_oplen
(
bam_get_cigar
(
aln
)[
k
]);
// deal with the cigar special ops
// HARD CLIPPING : No offset increment , no analysis of bases
if
(
op
==
BAM_CHARD_CLIP
)
{
if
(
op
==
BAM_CHARD_CLIP
)
{
continue
;
}
// SOFT CLIPPING : offset increment ; no analysis of bases
if
(
op
==
BAM_CSOFT_CLIP
)
{
if
(
op
==
BAM_CSOFT_CLIP
)
{
cumulOffset
+=
ol
;
continue
;
}
// insertion
if
(
op
==
BAM_CINS
)
{
if
(
op
==
BAM_CINS
)
{
// progress on query to get the inserted sequence.
string
insertedSequence
=
"+"
;
// need to determine on which codon we are
int
currentGenomicPosition
=
pos
;
int
codonStartPos
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
;
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
1
;
}
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
2
;
}
}
// get the bases of the inserted sequence
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
cumulOffset
+
x
);
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
cumulOffset
+
x
);
char
base
;
base
=
this
->
getBase
(
a
);
base
=
this
->
getBase
(
a
);
insertedSequence
+=
base
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
codonStartPos
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
insertedSequence
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
insertedSequence
)
>
0
))
{
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this
->
sampleData
[
chr
][
codonStartPos
][
insertedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : progress length of insertion
cumulOffset
+=
ol
;
...
...
@@ -1589,57 +1500,56 @@ int Vihgo::readSample()
}
// insertion
if
(
op
==
BAM_CDEL
)
{
if
(
op
==
BAM_CDEL
)
{
// progress on query to get the inserted sequence.
string
deletedSequence
=
"-"
;
// need to determine on which codon we are
int
currentGenomicPosition
=
pos
;
int
codonStartPos
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
;
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
}
else
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
1
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
1
;
}
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
-
2
)
>
0
)
{
codonStartPos
=
currentGenomicPosition
-
2
;
}
}
// get the bases of the reference sequence
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
for
(
int
x
=
0
;
x
<
ol
;
x
++
)
{
char
base
;
base
=
this
->
getReferenceBase
(
chr
,
x
+
currentGenomicPosition
);
base
=
this
->
getReferenceBase
(
chr
,
x
+
currentGenomicPosition
);
deletedSequence
+=
base
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
codonStartPos
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
codonStartPos
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
deletedSequence
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
codonStartPos
].
count
(
deletedSequence
)
>
0
))
{
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this
->
sampleData
[
chr
][
codonStartPos
][
deletedSequence
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : no progress
// on reference : progress the length of the op
...
...
@@ -1647,52 +1557,49 @@ int Vihgo::readSample()
continue
;
}
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
// check if we are at a codon position
int
currentGenomicPosition
=
pos
+
i
;
string
codon
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
char
base
;
base
=
this
->
getBase
(
a
);
base
=
this
->
getBase
(
a
);
codon
+=
base
;
}
i
+=
2
;
if
(
codon
.
find
(
"!"
)
!=
string
::
npos
)
{
if
(
codon
.
find
(
"!"
)
!=
string
::
npos
)
{
continue
;
}
// insert the codon in the data
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
)
)
{
map
<
int
,
map
<
string
,
int
>
>
tmpMap
;
if
(
!
(
this
->
sampleData
.
count
(
chr
)
>
0
))
{
map
<
int
,
map
<
string
,
int
>>
tmpMap
;
this
->
sampleData
[
chr
]
=
tmpMap
;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if
(
!
(
this
->
sampleData
[
chr
].
count
(
currentGenomicPosition
)
>
0
)
)
{
map
<
string
,
int
>
tmpMap
;
if
(
!
(
this
->
sampleData
[
chr
].
count
(
currentGenomicPosition
)
>
0
))
{
map
<
string
,
int
>
tmpMap
;
this
->
sampleData
[
chr
][
currentGenomicPosition
]
=
tmpMap
;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if
(
!
(
this
->
sampleData
[
chr
][
currentGenomicPosition
].
count
(
codon
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
[
chr
][
currentGenomicPosition
].
count
(
codon
)
>
0
))
{
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
=
0
;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
++
;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
else
{
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in
// control data " << endl;
}
this
->
sampleData
[
chr
][
currentGenomicPosition
][
codon
]
++
;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
else
{
continue
;
}
}
...
...
@@ -1701,49 +1608,44 @@ int Vihgo::readSample()
}
// destruct objects
bam_hdr_destroy
(
bamHdr
);
sam_close
(
fp_in
);
bam_destroy1
(
aln
);
bam_hdr_destroy
(
bamHdr
);
sam_close
(
fp_in
);
bam_destroy1
(
aln
);
return
0
;
}
// Methode to detect a stop in a read
bool
Vihgo
::
detectStopInRead
(
bam1_t
*
aln
,
int32_t
pos
,
string
chr
)
{
bool
Vihgo
::
detectStopInRead
(
bam1_t
*
aln
,
int32_t
pos
,
string
chr
)
{
const
uint8_t
*
sequence
=
bam_get_seq
(
aln
);
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
for
(
int
i
=
0
;
i
<
aln
->
core
.
l_qseq
;
i
++
)
{
// check if we are at a codon position
int
currentGenomicPosition
=
pos
+
i
;
string
codon
;
if
(
(
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
if
((
this
->
referenceCodon
[
chr
]).
count
(
currentGenomicPosition
)
>
0
)
{
for
(
int
x
=
0
;
x
<
3
;
x
++
)
{
int
a
=
bam_seqi
(
sequence
,
i
+
x
);
char
base
;
switch
(
a
)
{
case
1
:
switch
(
a
)
{
case
1
:
base
=
'A'
;
break
;
case
2
:
case
2
:
base
=
'C'
;
break
;
case
4
:
case
4
:
base
=
'G'
;
break
;
case
8
:
case
8
:
base
=
'T'
;
break
;
case
15
:
case
15
:
base
=
'N'
;
break
;
}
...
...
@@ -1751,15 +1653,11 @@ bool Vihgo::detectStopInRead( bam1_t * aln , int32_t pos , string chr )
}
i
+=
2
;
if
(
this
->
codonTable
[
codon
]
==
"*"
)
{
if
(
this
->
codonTable
[
codon
]
==
"*"
)
{
return
true
;
}
}
else
{
}
else
{
continue
;
}
}
...
...
@@ -1768,60 +1666,55 @@ bool Vihgo::detectStopInRead( bam1_t * aln , int32_t pos , string chr )
// Method to compute and write the consensus sequence
// need to modify for deletion and insertion
void
Vihgo
::
writeConsensus
()
{
void
Vihgo
::
writeConsensus
()
{
cerr
<<
"Writing consensus sequence ... "
<<
endl
;
ofstream
csStream
;
csStream
.
open
(
(
this
->
getConsensusFile
()).
c_str
()
,
ios
::
out
);
csStream
.
open
((
this
->
getConsensusFile
()).
c_str
(),
ios
::
out
);
map
<
string
,
map
<
int
,
map
<
string
,
int
>
>
>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>
>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
map
<
string
,
map
<
int
,
map
<
string
,
int
>>>::
iterator
myIterA
;
map
<
int
,
map
<
string
,
int
>
>::
iterator
myIterB
;
map
<
string
,
int
>::
iterator
myIterC
;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterE
;
map
<
int
,
string
>::
iterator
myIterF
;
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterE
;
map
<
int
,
string
>::
iterator
myIterF
;
for
(
myIterE
=
this
->
protCodon
.
begin
()
;
myIterE
!=
this
->
protCodon
.
end
()
;
myIterE
++
)
{
for
(
myIterE
=
this
->
protCodon
.
begin
();
myIterE
!=
this
->
protCodon
.
end
();
myIterE
++
)
{
string
prot
=
myIterE
->
first
;
string
chrom
=
getChrFromProt
(
prot
);
string
chrom
=
getChrFromProt
(
prot
);
if
(
!
(
this
->
sampleData
.
count
(
chrom
)
>
0
)
)
{
if
(
!
(
this
->
sampleData
.
count
(
chrom
)
>
0
))
{
continue
;
}
csStream
<<
">"
<<
prot
<<
endl
;
for
(
myIterF
=
(
myIterE
->
second
).
begin
()
;
myIterF
!=
(
myIterE
->
second
).
end
()
;
myIterF
++
)
{
for
(
myIterF
=
(
myIterE
->
second
).
begin
();
myIterF
!=
(
myIterE
->
second
).
end
();
myIterF
++
)
{
int
aaPosition
=
myIterF
->
first
;
int
gPosition
;
map
<
int
,
int
>
tmpMap
=
this
->
refProtCorresp
[
prot
];
map
<
int
,
int
>::
iterator
myIterG
;
for
(
myIterG
=
tmpMap
.
begin
()
;
myIterG
!=
tmpMap
.
end
()
;
myIterG
++
)
{
if
(
myIterG
->
second
==
aaPosition
)
{
for
(
myIterG
=
tmpMap
.
begin
();
myIterG
!=
tmpMap
.
end
();
myIterG
++
)
{
if
(
myIterG
->
second
==
aaPosition
)
{
gPosition
=
myIterG
->
first
;
break
;
}
}
// cerr << "ppos : " << aaPosition << " ; genomic position is : " << gPosition << endl;
// cerr << "ppos : " << aaPosition << " ; genomic position is : " <<
// gPosition << endl;
// determine most frequent codon
string
codon
=
"NNN"
;
int
count
=
0
;
if
(
this
->
sampleData
[
chrom
].
count
(
gPosition
)
>
0
)
{
map
<
string
,
int
>
tmpMapCodon
;
if
(
this
->
sampleData
[
chrom
].
count
(
gPosition
)
>
0
)
{
map
<
string
,
int
>
tmpMapCodon
;
tmpMapCodon
=
this
->
sampleData
[
chrom
][
gPosition
];
map
<
string
,
int
>::
iterator
myIterH
;
for
(
myIterH
=
tmpMapCodon
.
begin
()
;
myIterH
!=
tmpMapCodon
.
end
()
;
myIterH
++
)
{
if
(
myIterH
->
second
>
count
)
{
map
<
string
,
int
>::
iterator
myIterH
;
for
(
myIterH
=
tmpMapCodon
.
begin
();
myIterH
!=
tmpMapCodon
.
end
();
myIterH
++
)
{
if
(
myIterH
->
second
>
count
)
{
codon
=
myIterH
->
first
;
count
=
myIterH
->
second
;
}
...
...
@@ -1832,89 +1725,71 @@ void Vihgo::writeConsensus()
csStream
<<
endl
;
}
csStream
.
close
();
}
// method to get the chromosome in which the prot is included
string
Vihgo
::
getChrFromProt
(
string
incProt
)
{
string
Vihgo
::
getChrFromProt
(
string
incProt
)
{
return
this
->
genes
[
incProt
][
"chrom"
];
}
// Method to verify the existence of a gene in the reference provided
bool
Vihgo
::
isInGeneList
(
string
incName
)
{
map
<
string
,
map
<
int
,
char
>
>::
iterator
myIterA
;
for
(
myIterA
=
this
->
referenceSequence
.
begin
()
;
myIterA
!=
this
->
referenceSequence
.
end
()
;
myIterA
++
)
{
if
(
myIterA
->
first
==
incName
)
{
bool
Vihgo
::
isInGeneList
(
string
incName
)
{
map
<
string
,
map
<
int
,
char
>>::
iterator
myIterA
;
for
(
myIterA
=
this
->
referenceSequence
.
begin
();
myIterA
!=
this
->
referenceSequence
.
end
();
myIterA
++
)
{
if
(
myIterA
->
first
==
incName
)
{
return
true
;
}
}
return
false
;
}
// Method to determine if a position is in a gene
// return a gene name if possible, if several genes are present : return the
genes separated with comma
// or return "none" if no gene is at the position
string
Vihgo
::
isInGene
(
string
incChrom
,
int
incPos
)
{
// cerr << "Entering isInGene method with incoming arguments : " << incChrom
<< " and " << incPos << endl;
// return a gene name if possible, if several genes are present : return the
//
genes separated with comma
or return "none" if no gene is at the position
string
Vihgo
::
isInGene
(
string
incChrom
,
int
incPos
)
{
// cerr << "Entering isInGene method with incoming arguments : " << incChrom
//
<< " and " << incPos << endl;
string
ret
=
""
;
bool
posFound
=
false
;
map
<
string
,
map
<
string
,
string
>
>::
iterator
myIterA
;
for
(
myIterA
=
genes
.
begin
()
;
myIterA
!=
genes
.
end
()
;
myIterA
++
)
{
map
<
string
,
map
<
string
,
string
>>::
iterator
myIterA
;
for
(
myIterA
=
genes
.
begin
();
myIterA
!=
genes
.
end
();
myIterA
++
)
{
string
allChr
=
genes
[
myIterA
->
first
][
"chrom"
];
vector
<
string
>
allChrVec
=
parseOnSep
(
allChr
,
","
);
vector
<
string
>
allChrVec
=
parseOnSep
(
allChr
,
","
);
vector
<
string
>::
iterator
myIterB
;
for
(
myIterB
=
allChrVec
.
begin
()
;
myIterB
!=
allChrVec
.
end
()
;
myIterB
++
)
{
if
(
*
myIterB
==
incChrom
)
{
for
(
myIterB
=
allChrVec
.
begin
();
myIterB
!=
allChrVec
.
end
();
myIterB
++
)
{
if
(
*
myIterB
==
incChrom
)
{
vector
<
string
>
allStart
;
allStart
=
parseOnSep
(
genes
[
myIterA
->
first
][
"start"
]
,
","
);
allStart
=
parseOnSep
(
genes
[
myIterA
->
first
][
"start"
],
","
);
vector
<
string
>
allEnd
;
allEnd
=
parseOnSep
(
genes
[
myIterA
->
first
][
"end"
]
,
","
);
allEnd
=
parseOnSep
(
genes
[
myIterA
->
first
][
"end"
],
","
);
vector
<
string
>::
iterator
myIterC
;
int
i
=
0
;
for
(
myIterC
=
allStart
.
begin
()
;
myIterC
!=
allStart
.
end
()
;
myIterC
++
)
{
int
s
=
string_to_int
(
*
myIterC
);
if
(
incPos
>=
s
)
{
if
(
incPos
<=
string_to_int
(
allEnd
[
i
]
)
)
{
if
(
ret
.
length
()
>
0
)
{
ret
.
append
(
","
);
for
(
myIterC
=
allStart
.
begin
();
myIterC
!=
allStart
.
end
();
myIterC
++
)
{
int
s
=
string_to_int
(
*
myIterC
);
if
(
incPos
>=
s
)
{
if
(
incPos
<=
string_to_int
(
allEnd
[
i
]))
{
if
(
ret
.
length
()
>
0
)
{
ret
.
append
(
","
);
}
ret
+=
myIterA
->
first
;
}
}
i
++
;
i
++
;
}
}
else
{
}
else
{
continue
;
}
}
}
if
(
ret
.
length
()
>
0
)
{
if
(
ret
.
length
()
>
0
)
{
// cerr << "\tReturning " << ret << endl;
return
ret
;
}
else
{
return
ret
;
}
else
{
// cerr << "\tReturning none" << endl;
return
"none"
;
}
...
...
@@ -1923,89 +1798,62 @@ string Vihgo::isInGene( string incChrom , int incPos )
// getters
// method to get the refernce base, given a chr and a position
char
Vihgo
::
getReferenceBase
(
string
incChr
,
int
incPos
)
{
if
(
(
this
->
referenceSequence
[
incChr
]).
count
(
incPos
)
>
0
)
{
char
Vihgo
::
getReferenceBase
(
string
incChr
,
int
incPos
)
{
if
((
this
->
referenceSequence
[
incChr
]).
count
(
incPos
)
>
0
)
{
return
this
->
referenceSequence
[
incChr
][
incPos
];
}
else
{
}
else
{
return
'N'
;
}
}
// method to get AA from a codon
string
Vihgo
::
getAAfromCodon
(
string
incCodon
)
{
if
(
this
->
codonTable
.
count
(
incCodon
)
>
0
)
{
string
Vihgo
::
getAAfromCodon
(
string
incCodon
)
{
if
(
this
->
codonTable
.
count
(
incCodon
)
>
0
)
{
return
this
->
codonTable
[
incCodon
];
}
else
{
}
else
{
return
"?"
;
}
}
vector
<
string
>
Vihgo
::
getAnnotationFiles
()
{
vector
<
string
>
Vihgo
::
getAnnotationFiles
()
{
vector
<
string
>
res
;
res
.
push_back
(
this
->
integraseFile
);
res
.
push_back
(
this
->
proteaseFile
);
res
.
push_back
(
this
->
retrotranscriptaseFile
);
res
.
push_back
(
this
->
integraseFile
);
res
.
push_back
(
this
->
proteaseFile
);
res
.
push_back
(
this
->
retrotranscriptaseFile
);
return
res
;
}
string
Vihgo
::
getControlBamFile
()
{
return
this
->
inRunControlBam
;
}
string
Vihgo
::
getBamFile
()
{
return
this
->
inputBamFile
;
}
string
Vihgo
::
getControlBamFile
()
{
return
this
->
inRunControlBam
;
}
bool
Vihgo
::
getCountStop
()
{
return
this
->
countStop
;
}
string
Vihgo
::
getBamFile
()
{
return
this
->
inputBamFile
;
}
string
Vihgo
::
getConsensusFile
()
{
return
this
->
csFile
;
}
bool
Vihgo
::
getCountStop
()
{
return
this
->
countStop
;
}
bool
Vihgo
::
isThereAControl
()
{
if
(
(
this
->
inRunControlBam
).
size
()
>
0
)
{
string
Vihgo
::
getConsensusFile
()
{
return
this
->
csFile
;
}
bool
Vihgo
::
isThereAControl
()
{
if
((
this
->
inRunControlBam
).
size
()
>
0
)
{
return
true
;
}
return
false
;
}
char
Vihgo
::
getBase
(
int
b
)
{
char
Vihgo
::
getBase
(
int
b
)
{
// cerr << "inc int base: " << b << " : " << endl;;
switch
(
b
)
{
case
1
:
switch
(
b
)
{
case
1
:
return
'A'
;
case
2
:
case
2
:
return
'C'
;
case
4
:
case
4
:
return
'G'
;
case
8
:
case
8
:
return
'T'
;
case
15
:
case
15
:
return
'N'
;
}
return
'!'
;
...
...
This diff is collapsed.
Click to expand it.
cpp/main.cpp
View file @
a04b957e
...
...
@@ -66,8 +66,8 @@ int main(int argc, char* argv[])
(
"gene,g"
,
po
::
value
<
string
>
(
&
geneFile
)
,
"Gene annotation file related to reference genome provided."
)
(
"depth,d"
,
po
::
value
<
int
>
(
&
depthThreshold
)
->
default_value
(
50
),
"Depth treshold to call a variant. (optional)"
)
(
"ratio,a"
,
po
::
value
<
double
>
(
&
ratioThreshold
)
->
default_value
(
0.1
)
,
"Minimum ratio of alt reads to
take in account
a mutation. (optional)"
)
(
"pvalue,p"
,
po
::
value
<
double
>
(
&
pvalueThreshold
)
->
default_value
(
0.05
)
,
"pvalue threshold from FET to
take in account
a mutation. (optional)"
)
(
"ratio,a"
,
po
::
value
<
double
>
(
&
ratioThreshold
)
->
default_value
(
0.1
)
,
"Minimum ratio of alt reads to
consider
a mutation. (optional)"
)
(
"pvalue,p"
,
po
::
value
<
double
>
(
&
pvalueThreshold
)
->
default_value
(
0.05
)
,
"pvalue threshold from FET to
consider
a mutation. (optional)"
)
(
"stop,s"
,
"Activate stop codon detction."
)
(
"thread,t"
,
po
::
value
<
int
>
(
&
nbThread
)
->
default_value
(
50
),
"Number of thread to use. (NYI)"
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment