Commit d31f85e5 authored by Yannis's avatar Yannis

implementation of reference, genes and annotations reading

parent 82c20d8b
File mode changed from 100644 to 100755
......@@ -6,6 +6,7 @@
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <algorithm>
#include <sys/time.h>
#include <sys/types.h>
......@@ -24,12 +25,13 @@ Vihgo::Vihgo()
{
//
}
Vihgo::Vihgo( string iBF , string gF, string intF, string protF, string refF, string outF, string cResF, string csF, string iRCB, int dT, int nT, int lL, double rT ,double pvT, bool countStop )
Vihgo::Vihgo( string iBF , string gF, string rtF , string intF, string protF, string refF, string outF, string cResF, string csF, string iRCB, int dT, int nT, int lL, double rT ,double pvT, bool countStop )
{
this->inputBamFile = iBF;
this->geneFile = gF;
this->integraseFile = intF;
this->proteaseFile = protF;
this->retrotranscriptaseFile = rtF;
this->referenceFile = refF;
this->outputFile = outF;
this->codonResultFile = cResF;
......@@ -50,6 +52,9 @@ int Vihgo::mainLoop()
//this->displayGenome();
this->getGenes();
this->displayGenes();
this->getAnnotation();
this->readRunControl();
return 0;
}
......@@ -94,7 +99,7 @@ int Vihgo::getGenome( )
void Vihgo::displayGenome()
{
map<string, map<int, char>>::iterator myIterA ;
map<string, map<int, char> >::iterator myIterA ;
for( myIterA = this->referenceSequence.begin() ; myIterA != this->referenceSequence.end() ; myIterA ++ )
{
......@@ -126,7 +131,6 @@ int Vihgo::getGenes()
string end;
string strand;
// fasta parsing
ifstream geneStream ( this->geneFile.c_str() );
if ( geneStream )
......@@ -188,7 +192,6 @@ int Vihgo::getGenes()
// cerr << "Gene before : " << subMot << endl;
subMot = pyReplace(subMot , "\"" , "");
// cerr << "Gene after : " << subMot << endl;
// test if gene is already known
if (this->genes.count( subMot ) == 0 )
{
......@@ -202,7 +205,6 @@ int Vihgo::getGenes()
this->genes[subMot]["strand"] = "";
this->genes[subMot]["chrom"] = chromosome;
}
// add informations
if( (this->genes[subMot]["start"]).length() > 0 )
{
......@@ -226,14 +228,102 @@ int Vihgo::getGenes()
}
}
}
return 0;
map< string , map< string , string > >::iterator myIterC;
for( myIterC = genes.begin() ; myIterC != genes.end() ; myIterC ++ )
{
string currentGene = myIterC->first;
cerr << "getting codons for gene " << currentGene << endl;
chromosome =myIterC->second["chrom"];
vector<string> starts = parseOnSep( myIterC->second["start"] , "," );
vector<string> ends = parseOnSep( myIterC->second["end"] , "," );
vector<string> strands = parseOnSep( myIterC->second["strand"] , "," );
int geneSubPart = 0;
for( geneSubPart = 0 ; geneSubPart < starts.size() ; geneSubPart ++ )
{
map< int , char > posList;
posList = this->referenceSequence[chromosome];
// sort position in the map
map< int , char>::iterator myIterE;
vector<int> pos;
for( myIterE = posList.begin() ; myIterE != posList.end() ; myIterE ++ )
{
pos.push_back( myIterE->first );
}
sort( pos.begin(), pos.begin()+pos.size() );
if ( strands[geneSubPart] == "+" )
{
cerr << "\tStrand is + " << endl;
}
else
{
cerr << "\tStrand is _ " << endl;
reverse(pos.begin(),pos.end());
}
string codon = "";
vector<int>::iterator myIterF;
string s_start = starts[geneSubPart];
string s_end = ends[geneSubPart];
int i_start = string_to_int( s_start );
int i_end = string_to_int( s_end );
for( myIterF = pos.begin(); myIterF != pos.end() ; myIterF++ )
{
if( (*myIterF >= i_start) && (*myIterF <= i_end) )
{
codon += this->referenceSequence[chromosome][*myIterF];
// store the codon if terminated
if( (codon.length() % 3 == 0) && (codon.length() != 0 ))
{
if (this->referenceCodon.count( chromosome ) == 0 )
{
map <int, string > tmpMap;
this->referenceCodon[chromosome] = tmpMap;
}
this->referenceCodon[chromosome][*myIterF - 2] = codon;
cerr << "\tAdded " << codon << " at position " << *myIterF - 2 << endl;
codon = "";
}
}
}
}
}
// convertion to protein aa
map< string , map< int , string > >::iterator myIterG;
for( myIterG = this->referenceCodon.begin() ; myIterG != this->referenceCodon.end() ; myIterG ++)
{
map< int , string > tmpMap;
this->protCodon[myIterG->first] = tmpMap;
// sort position in the map
map< int , string>::iterator myIterE;
vector<int> pos;
for( myIterE = (myIterG->second).begin() ; myIterE != (myIterG->second).end() ; myIterE ++ )
{
pos.push_back( myIterE->first );
}
sort( pos.begin(), pos.begin()+pos.size() );
vector< int >::iterator myIterH;
int i = 1;
for( myIterH = pos.begin(); myIterH != pos.end() ; myIterH ++)
{
this->referenceCodon[myIterG->first][*myIterH] = i;
i++;
}
}
return 0;
}
void Vihgo::displayGenes()
{
cerr << "Displaying gene list : " << endl;
map<string, map< string , string>>::iterator myIterB;
map<string, map< string , string> >::iterator myIterB;
for( myIterB = this->genes.begin() ;myIterB != this->genes.end() ; myIterB ++ )
{
cerr << "Gene : " << myIterB->first << " : " << endl;
......@@ -244,15 +334,133 @@ void Vihgo::displayGenes()
}
}
int Vihgo::getAnnotation()
{
vector<string> fileToParse;
cerr << "Starting annotation parsing " << endl;
if( !( this->isAnnotationDefined() ) )
{
cerr << "Annotation files are not fully provided : passing" << endl;
return -1;
}
fileToParse = this->getAnnotationFiles();
vector< string >::iterator myIterA;
string mot;
string ligne;
string chromosome;
for( myIterA = fileToParse.begin() ; myIterA != fileToParse.end() ; myIterA ++)
{
// rt parsing
ifstream annotStream ( (*myIterA).c_str() );
if ( annotStream )
{
while ( getline ( annotStream, ligne) )
{
// pass headers
if ( ligne.at(0) == ' ' )
{
continue;
}
if ( ligne.at(0) == 'P' )
{
continue;
}
if ( ligne.at(0) == 'A' )
{
continue;
}
if( ligne.length() < 10)
{
continue;
}
// get name of the reference
if ( ligne.at(0) == '#' )
{
chromosome = pyReplace( ligne , "#" , "");
chromosome = strip(chromosome);
cerr << "Found a # with line : " << ligne << endl ;
cerr << "\t\tto " << chromosome << "." << endl;
if( this->referenceCodon.count( chromosome ) == 0 )
{
cerr << chromosome << " not found in reference : passing " << endl;
this->annotationIsOk = false;
break;
}
if (this->knownMutationTable.count( chromosome ) == 0 )
{
map< string , string > tmpMap;
this->knownMutationTable[chromosome] = tmpMap;
cerr << chromosome << " newly created as an annotation " << endl;
}
}
int i = 0;
// get information from the line
string s_aapos;
string aa;
string aamuted;
string conclusion;
istringstream issLigne (ligne);
while ( getline( issLigne, mot, '\t' ) )
{
switch( i )
{
case 0:
s_aapos = mot;
continue;
case 2 :
aa = mot;
continue;
case 4 :
aamuted = mot;
continue;
case 5 :
conclusion = mot;
continue;
}
i ++;
}
string index;
index = aa + s_aapos + aamuted;
if( aa == aamuted )
{
continue;
}
this->knownMutationTable[chromosome][index] = conclusion;
cerr << "\tAdding " << index << " aka " << conclusion << endl;
}
}
annotStream.close();
}
return 0;
}
bool Vihgo::isAnnotationDefined()
{
if( ((this->proteaseFile).length() > 0) && (this->integraseFile.length() > 0) && (this->retrotranscriptaseFile.length() > 0) )
{
return true;
}
return false;
}
int Vihgo::readRunControl()
{
return 0;
}
......@@ -260,6 +468,14 @@ void Vihgo::displayGenes()
// getters
vector<string> Vihgo::getAnnotationFiles()
{
vector<string> res;
res.push_back( this->integraseFile );
res.push_back( this->proteaseFile );
res.push_back( this->retrotranscriptaseFile );
return res;
}
// setters
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment