Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
vihgo
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
vihgo
Commits
d31f85e5
Commit
d31f85e5
authored
Apr 23, 2020
by
Yannis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implementation of reference, genes and annotations reading
parent
82c20d8b
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
223 additions
and
7 deletions
+223
-7
Makefile
cpp/Makefile
+0
-0
Vihgo.cpp
cpp/Vihgo.cpp
+223
-7
No files found.
cpp/Makefile
100644 → 100755
View file @
d31f85e5
File mode changed from 100644 to 100755
cpp/Vihgo.cpp
100644 → 100755
View file @
d31f85e5
...
...
@@ -6,6 +6,7 @@
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <algorithm>
#include <sys/time.h>
#include <sys/types.h>
...
...
@@ -24,12 +25,13 @@ Vihgo::Vihgo()
{
//
}
Vihgo
::
Vihgo
(
string
iBF
,
string
gF
,
string
intF
,
string
protF
,
string
refF
,
string
outF
,
string
cResF
,
string
csF
,
string
iRCB
,
int
dT
,
int
nT
,
int
lL
,
double
rT
,
double
pvT
,
bool
countStop
)
Vihgo
::
Vihgo
(
string
iBF
,
string
gF
,
string
rtF
,
string
intF
,
string
protF
,
string
refF
,
string
outF
,
string
cResF
,
string
csF
,
string
iRCB
,
int
dT
,
int
nT
,
int
lL
,
double
rT
,
double
pvT
,
bool
countStop
)
{
this
->
inputBamFile
=
iBF
;
this
->
geneFile
=
gF
;
this
->
integraseFile
=
intF
;
this
->
proteaseFile
=
protF
;
this
->
retrotranscriptaseFile
=
rtF
;
this
->
referenceFile
=
refF
;
this
->
outputFile
=
outF
;
this
->
codonResultFile
=
cResF
;
...
...
@@ -50,6 +52,9 @@ int Vihgo::mainLoop()
//this->displayGenome();
this
->
getGenes
();
this
->
displayGenes
();
this
->
getAnnotation
();
this
->
readRunControl
();
return
0
;
}
...
...
@@ -94,7 +99,7 @@ int Vihgo::getGenome( )
void
Vihgo
::
displayGenome
()
{
map
<
string
,
map
<
int
,
char
>>::
iterator
myIterA
;
map
<
string
,
map
<
int
,
char
>
>::
iterator
myIterA
;
for
(
myIterA
=
this
->
referenceSequence
.
begin
()
;
myIterA
!=
this
->
referenceSequence
.
end
()
;
myIterA
++
)
{
...
...
@@ -126,7 +131,6 @@ int Vihgo::getGenes()
string
end
;
string
strand
;
// fasta parsing
ifstream
geneStream
(
this
->
geneFile
.
c_str
()
);
if
(
geneStream
)
...
...
@@ -188,7 +192,6 @@ int Vihgo::getGenes()
// cerr << "Gene before : " << subMot << endl;
subMot
=
pyReplace
(
subMot
,
"
\"
"
,
""
);
// cerr << "Gene after : " << subMot << endl;
// test if gene is already known
if
(
this
->
genes
.
count
(
subMot
)
==
0
)
{
...
...
@@ -202,7 +205,6 @@ int Vihgo::getGenes()
this
->
genes
[
subMot
][
"strand"
]
=
""
;
this
->
genes
[
subMot
][
"chrom"
]
=
chromosome
;
}
// add informations
if
(
(
this
->
genes
[
subMot
][
"start"
]).
length
()
>
0
)
{
...
...
@@ -226,14 +228,102 @@ int Vihgo::getGenes()
}
}
}
return
0
;
map
<
string
,
map
<
string
,
string
>
>::
iterator
myIterC
;
for
(
myIterC
=
genes
.
begin
()
;
myIterC
!=
genes
.
end
()
;
myIterC
++
)
{
string
currentGene
=
myIterC
->
first
;
cerr
<<
"getting codons for gene "
<<
currentGene
<<
endl
;
chromosome
=
myIterC
->
second
[
"chrom"
];
vector
<
string
>
starts
=
parseOnSep
(
myIterC
->
second
[
"start"
]
,
","
);
vector
<
string
>
ends
=
parseOnSep
(
myIterC
->
second
[
"end"
]
,
","
);
vector
<
string
>
strands
=
parseOnSep
(
myIterC
->
second
[
"strand"
]
,
","
);
int
geneSubPart
=
0
;
for
(
geneSubPart
=
0
;
geneSubPart
<
starts
.
size
()
;
geneSubPart
++
)
{
map
<
int
,
char
>
posList
;
posList
=
this
->
referenceSequence
[
chromosome
];
// sort position in the map
map
<
int
,
char
>::
iterator
myIterE
;
vector
<
int
>
pos
;
for
(
myIterE
=
posList
.
begin
()
;
myIterE
!=
posList
.
end
()
;
myIterE
++
)
{
pos
.
push_back
(
myIterE
->
first
);
}
sort
(
pos
.
begin
(),
pos
.
begin
()
+
pos
.
size
()
);
if
(
strands
[
geneSubPart
]
==
"+"
)
{
cerr
<<
"
\t
Strand is + "
<<
endl
;
}
else
{
cerr
<<
"
\t
Strand is _ "
<<
endl
;
reverse
(
pos
.
begin
(),
pos
.
end
());
}
string
codon
=
""
;
vector
<
int
>::
iterator
myIterF
;
string
s_start
=
starts
[
geneSubPart
];
string
s_end
=
ends
[
geneSubPart
];
int
i_start
=
string_to_int
(
s_start
);
int
i_end
=
string_to_int
(
s_end
);
for
(
myIterF
=
pos
.
begin
();
myIterF
!=
pos
.
end
()
;
myIterF
++
)
{
if
(
(
*
myIterF
>=
i_start
)
&&
(
*
myIterF
<=
i_end
)
)
{
codon
+=
this
->
referenceSequence
[
chromosome
][
*
myIterF
];
// store the codon if terminated
if
(
(
codon
.
length
()
%
3
==
0
)
&&
(
codon
.
length
()
!=
0
))
{
if
(
this
->
referenceCodon
.
count
(
chromosome
)
==
0
)
{
map
<
int
,
string
>
tmpMap
;
this
->
referenceCodon
[
chromosome
]
=
tmpMap
;
}
this
->
referenceCodon
[
chromosome
][
*
myIterF
-
2
]
=
codon
;
cerr
<<
"
\t
Added "
<<
codon
<<
" at position "
<<
*
myIterF
-
2
<<
endl
;
codon
=
""
;
}
}
}
}
}
// convertion to protein aa
map
<
string
,
map
<
int
,
string
>
>::
iterator
myIterG
;
for
(
myIterG
=
this
->
referenceCodon
.
begin
()
;
myIterG
!=
this
->
referenceCodon
.
end
()
;
myIterG
++
)
{
map
<
int
,
string
>
tmpMap
;
this
->
protCodon
[
myIterG
->
first
]
=
tmpMap
;
// sort position in the map
map
<
int
,
string
>::
iterator
myIterE
;
vector
<
int
>
pos
;
for
(
myIterE
=
(
myIterG
->
second
).
begin
()
;
myIterE
!=
(
myIterG
->
second
).
end
()
;
myIterE
++
)
{
pos
.
push_back
(
myIterE
->
first
);
}
sort
(
pos
.
begin
(),
pos
.
begin
()
+
pos
.
size
()
);
vector
<
int
>::
iterator
myIterH
;
int
i
=
1
;
for
(
myIterH
=
pos
.
begin
();
myIterH
!=
pos
.
end
()
;
myIterH
++
)
{
this
->
referenceCodon
[
myIterG
->
first
][
*
myIterH
]
=
i
;
i
++
;
}
}
return
0
;
}
void
Vihgo
::
displayGenes
()
{
cerr
<<
"Displaying gene list : "
<<
endl
;
map
<
string
,
map
<
string
,
string
>>::
iterator
myIterB
;
map
<
string
,
map
<
string
,
string
>
>::
iterator
myIterB
;
for
(
myIterB
=
this
->
genes
.
begin
()
;
myIterB
!=
this
->
genes
.
end
()
;
myIterB
++
)
{
cerr
<<
"Gene : "
<<
myIterB
->
first
<<
" : "
<<
endl
;
...
...
@@ -244,15 +334,133 @@ void Vihgo::displayGenes()
}
}
int
Vihgo
::
getAnnotation
()
{
vector
<
string
>
fileToParse
;
cerr
<<
"Starting annotation parsing "
<<
endl
;
if
(
!
(
this
->
isAnnotationDefined
()
)
)
{
cerr
<<
"Annotation files are not fully provided : passing"
<<
endl
;
return
-
1
;
}
fileToParse
=
this
->
getAnnotationFiles
();
vector
<
string
>::
iterator
myIterA
;
string
mot
;
string
ligne
;
string
chromosome
;
for
(
myIterA
=
fileToParse
.
begin
()
;
myIterA
!=
fileToParse
.
end
()
;
myIterA
++
)
{
// rt parsing
ifstream
annotStream
(
(
*
myIterA
).
c_str
()
);
if
(
annotStream
)
{
while
(
getline
(
annotStream
,
ligne
)
)
{
// pass headers
if
(
ligne
.
at
(
0
)
==
' '
)
{
continue
;
}
if
(
ligne
.
at
(
0
)
==
'P'
)
{
continue
;
}
if
(
ligne
.
at
(
0
)
==
'A'
)
{
continue
;
}
if
(
ligne
.
length
()
<
10
)
{
continue
;
}
// get name of the reference
if
(
ligne
.
at
(
0
)
==
'#'
)
{
chromosome
=
pyReplace
(
ligne
,
"#"
,
""
);
chromosome
=
strip
(
chromosome
);
cerr
<<
"Found a # with line : "
<<
ligne
<<
endl
;
cerr
<<
"
\t\t
to "
<<
chromosome
<<
"."
<<
endl
;
if
(
this
->
referenceCodon
.
count
(
chromosome
)
==
0
)
{
cerr
<<
chromosome
<<
" not found in reference : passing "
<<
endl
;
this
->
annotationIsOk
=
false
;
break
;
}
if
(
this
->
knownMutationTable
.
count
(
chromosome
)
==
0
)
{
map
<
string
,
string
>
tmpMap
;
this
->
knownMutationTable
[
chromosome
]
=
tmpMap
;
cerr
<<
chromosome
<<
" newly created as an annotation "
<<
endl
;
}
}
int
i
=
0
;
// get information from the line
string
s_aapos
;
string
aa
;
string
aamuted
;
string
conclusion
;
istringstream
issLigne
(
ligne
);
while
(
getline
(
issLigne
,
mot
,
'\t'
)
)
{
switch
(
i
)
{
case
0
:
s_aapos
=
mot
;
continue
;
case
2
:
aa
=
mot
;
continue
;
case
4
:
aamuted
=
mot
;
continue
;
case
5
:
conclusion
=
mot
;
continue
;
}
i
++
;
}
string
index
;
index
=
aa
+
s_aapos
+
aamuted
;
if
(
aa
==
aamuted
)
{
continue
;
}
this
->
knownMutationTable
[
chromosome
][
index
]
=
conclusion
;
cerr
<<
"
\t
Adding "
<<
index
<<
" aka "
<<
conclusion
<<
endl
;
}
}
annotStream
.
close
();
}
return
0
;
}
bool
Vihgo
::
isAnnotationDefined
()
{
if
(
((
this
->
proteaseFile
).
length
()
>
0
)
&&
(
this
->
integraseFile
.
length
()
>
0
)
&&
(
this
->
retrotranscriptaseFile
.
length
()
>
0
)
)
{
return
true
;
}
return
false
;
}
int
Vihgo
::
readRunControl
()
{
return
0
;
}
...
...
@@ -260,6 +468,14 @@ void Vihgo::displayGenes()
// getters
vector
<
string
>
Vihgo
::
getAnnotationFiles
()
{
vector
<
string
>
res
;
res
.
push_back
(
this
->
integraseFile
);
res
.
push_back
(
this
->
proteaseFile
);
res
.
push_back
(
this
->
retrotranscriptaseFile
);
return
res
;
}
// setters
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment