Commit a04b957e authored by Yannis Duffourd's avatar Yannis Duffourd

Merge branch 'master' of gitlab.gad-bioinfo.org:gad-public/vihgo into master

parents a9fa4b58 b011f58c
// C++ std libs
#include <iostream>
// #include <iomanip>
#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <fstream>
#include <map>
#include <sstream>
#include <stdexcept>
#include <algorithm>
#include <string>
#include <sys/time.h>
#include <sys/types.h>
#include <vector>
// C++ Boost
#include <boost/program_options.hpp>
// htslib
#include <htslib/sam.h>
......@@ -26,14 +25,13 @@
using namespace std;
Vihgo::Vihgo()
{
Vihgo::Vihgo() {
//
}
Vihgo::Vihgo( string iBF , string gF, string rtF , string intF, string protF, string refF, string outF, string cResF, string csF, string iRCB, int dT, int nT, int lL, double rT ,double pvT, bool countStop )
{
Vihgo::Vihgo(string iBF, string gF, string rtF, string intF, string protF,
string refF, string outF, string cResF, string csF, string iRCB,
int dT, int nT, int lL, double rT, double pvT, bool countStop) {
this->inputBamFile = iBF;
this->geneFile = gF;
this->integraseFile = intF;
......@@ -51,76 +49,73 @@ Vihgo::Vihgo( string iBF , string gF, string rtF , string intF, string protF, st
this->pvalueThreshold = pvT;
this->countStop = countStop;
this->codonTable.insert( pair<string, string>("TTT","F") );
this->codonTable.insert( pair<string, string>("TTC","F") );
this->codonTable.insert( pair<string, string>("TTA","L") );
this->codonTable.insert( pair<string, string>("TTG","L") );
this->codonTable.insert( pair<string, string>("CTT","L") );
this->codonTable.insert( pair<string, string>("CTC","L") );
this->codonTable.insert( pair<string, string>("CTA","L") );
this->codonTable.insert( pair<string, string>("CTG","L") );
this->codonTable.insert( pair<string, string>("ATT","I") );
this->codonTable.insert( pair<string, string>("ATC","I") );
this->codonTable.insert( pair<string, string>("ATA","I") );
this->codonTable.insert( pair<string, string>("ATG","M") );
this->codonTable.insert( pair<string, string>("GTT","V") );
this->codonTable.insert( pair<string, string>("GTC","V") );
this->codonTable.insert( pair<string, string>("GTA","V") );
this->codonTable.insert( pair<string, string>("GTG","V") );
this->codonTable.insert( pair<string, string>("TCT","S") );
this->codonTable.insert( pair<string, string>("TCC","S") );
this->codonTable.insert( pair<string, string>("TCA","S") );
this->codonTable.insert( pair<string, string>("TCG","S") );
this->codonTable.insert( pair<string, string>("CCT","P") );
this->codonTable.insert( pair<string, string>("CCC","P") );
this->codonTable.insert( pair<string, string>("CCA","P") );
this->codonTable.insert( pair<string, string>("CCG","P") );
this->codonTable.insert( pair<string, string>("ACT","T") );
this->codonTable.insert( pair<string, string>("ACC","T") );
this->codonTable.insert( pair<string, string>("ACA","T") );
this->codonTable.insert( pair<string, string>("ACG","T") );
this->codonTable.insert( pair<string, string>("GCT","A") );
this->codonTable.insert( pair<string, string>("GCC","A") );
this->codonTable.insert( pair<string, string>("GCA","A") );
this->codonTable.insert( pair<string, string>("GCG","A") );
this->codonTable.insert( pair<string, string>("TAT","Y") );
this->codonTable.insert( pair<string, string>("TAC","Y") );
this->codonTable.insert( pair<string, string>("TAA","*") );
this->codonTable.insert( pair<string, string>("TAG","*") );
this->codonTable.insert( pair<string, string>("CAT","H") );
this->codonTable.insert( pair<string, string>("CAC","H") );
this->codonTable.insert( pair<string, string>("CAA","Q") );
this->codonTable.insert( pair<string, string>("CAG","Q") );
this->codonTable.insert( pair<string, string>("AAT","N") );
this->codonTable.insert( pair<string, string>("AAC","N") );
this->codonTable.insert( pair<string, string>("AAA","K") );
this->codonTable.insert( pair<string, string>("AAG","K") );
this->codonTable.insert( pair<string, string>("GAT","D") );
this->codonTable.insert( pair<string, string>("GAC","D") );
this->codonTable.insert( pair<string, string>("GAA","E") );
this->codonTable.insert( pair<string, string>("GAG","E") );
this->codonTable.insert( pair<string, string>("TGT","C") );
this->codonTable.insert( pair<string, string>("TGC","C") );
this->codonTable.insert( pair<string, string>("TGA","*") );
this->codonTable.insert( pair<string, string>("TGG","W") );
this->codonTable.insert( pair<string, string>("CGT","R") );
this->codonTable.insert( pair<string, string>("CGC","R") );
this->codonTable.insert( pair<string, string>("CGA","R") );
this->codonTable.insert( pair<string, string>("CGG","R") );
this->codonTable.insert( pair<string, string>("AGT","S") );
this->codonTable.insert( pair<string, string>("AGC","S") );
this->codonTable.insert( pair<string, string>("AGA","R") );
this->codonTable.insert( pair<string, string>("AGG","R") );
this->codonTable.insert( pair<string, string>("GGT","G") );
this->codonTable.insert( pair<string, string>("GGC","G") );
this->codonTable.insert( pair<string, string>("GGA","G") );
this->codonTable.insert( pair<string, string>("GGG","G") );
this->codonTable.insert(pair<string, string>("TTT", "F"));
this->codonTable.insert(pair<string, string>("TTC", "F"));
this->codonTable.insert(pair<string, string>("TTA", "L"));
this->codonTable.insert(pair<string, string>("TTG", "L"));
this->codonTable.insert(pair<string, string>("CTT", "L"));
this->codonTable.insert(pair<string, string>("CTC", "L"));
this->codonTable.insert(pair<string, string>("CTA", "L"));
this->codonTable.insert(pair<string, string>("CTG", "L"));
this->codonTable.insert(pair<string, string>("ATT", "I"));
this->codonTable.insert(pair<string, string>("ATC", "I"));
this->codonTable.insert(pair<string, string>("ATA", "I"));
this->codonTable.insert(pair<string, string>("ATG", "M"));
this->codonTable.insert(pair<string, string>("GTT", "V"));
this->codonTable.insert(pair<string, string>("GTC", "V"));
this->codonTable.insert(pair<string, string>("GTA", "V"));
this->codonTable.insert(pair<string, string>("GTG", "V"));
this->codonTable.insert(pair<string, string>("TCT", "S"));
this->codonTable.insert(pair<string, string>("TCC", "S"));
this->codonTable.insert(pair<string, string>("TCA", "S"));
this->codonTable.insert(pair<string, string>("TCG", "S"));
this->codonTable.insert(pair<string, string>("CCT", "P"));
this->codonTable.insert(pair<string, string>("CCC", "P"));
this->codonTable.insert(pair<string, string>("CCA", "P"));
this->codonTable.insert(pair<string, string>("CCG", "P"));
this->codonTable.insert(pair<string, string>("ACT", "T"));
this->codonTable.insert(pair<string, string>("ACC", "T"));
this->codonTable.insert(pair<string, string>("ACA", "T"));
this->codonTable.insert(pair<string, string>("ACG", "T"));
this->codonTable.insert(pair<string, string>("GCT", "A"));
this->codonTable.insert(pair<string, string>("GCC", "A"));
this->codonTable.insert(pair<string, string>("GCA", "A"));
this->codonTable.insert(pair<string, string>("GCG", "A"));
this->codonTable.insert(pair<string, string>("TAT", "Y"));
this->codonTable.insert(pair<string, string>("TAC", "Y"));
this->codonTable.insert(pair<string, string>("TAA", "*"));
this->codonTable.insert(pair<string, string>("TAG", "*"));
this->codonTable.insert(pair<string, string>("CAT", "H"));
this->codonTable.insert(pair<string, string>("CAC", "H"));
this->codonTable.insert(pair<string, string>("CAA", "Q"));
this->codonTable.insert(pair<string, string>("CAG", "Q"));
this->codonTable.insert(pair<string, string>("AAT", "N"));
this->codonTable.insert(pair<string, string>("AAC", "N"));
this->codonTable.insert(pair<string, string>("AAA", "K"));
this->codonTable.insert(pair<string, string>("AAG", "K"));
this->codonTable.insert(pair<string, string>("GAT", "D"));
this->codonTable.insert(pair<string, string>("GAC", "D"));
this->codonTable.insert(pair<string, string>("GAA", "E"));
this->codonTable.insert(pair<string, string>("GAG", "E"));
this->codonTable.insert(pair<string, string>("TGT", "C"));
this->codonTable.insert(pair<string, string>("TGC", "C"));
this->codonTable.insert(pair<string, string>("TGA", "*"));
this->codonTable.insert(pair<string, string>("TGG", "W"));
this->codonTable.insert(pair<string, string>("CGT", "R"));
this->codonTable.insert(pair<string, string>("CGC", "R"));
this->codonTable.insert(pair<string, string>("CGA", "R"));
this->codonTable.insert(pair<string, string>("CGG", "R"));
this->codonTable.insert(pair<string, string>("AGT", "S"));
this->codonTable.insert(pair<string, string>("AGC", "S"));
this->codonTable.insert(pair<string, string>("AGA", "R"));
this->codonTable.insert(pair<string, string>("AGG", "R"));
this->codonTable.insert(pair<string, string>("GGT", "G"));
this->codonTable.insert(pair<string, string>("GGC", "G"));
this->codonTable.insert(pair<string, string>("GGA", "G"));
this->codonTable.insert(pair<string, string>("GGG", "G"));
}
int Vihgo::mainLoop()
{
int Vihgo::mainLoop() {
this->getGenome();
this->displayGenome();
this->getGenes();
......@@ -128,22 +123,20 @@ int Vihgo::mainLoop()
// this->getAnnotation();
this->displayCodons();
this->displayReferenceCodons();
if( this->isThereAControl() )
{
if (this->isThereAControl()) {
this->readRunControl();
}
this->readSample();
this->displayData( "control" );
this->displayData( "sample" );
this->writeCodonFile( "sample" );
this->displayData("control");
this->displayData("sample");
this->writeCodonFile("sample");
this->writeConsensus();
this->variantCalling();
return 0;
}
int Vihgo::getGenome()
{
int Vihgo::getGenome() {
struct timeval tbegin, tend;
string ligne;
char mot;
......@@ -152,34 +145,27 @@ int Vihgo::getGenome()
string currentChr;
// fasta parsing
ifstream fastaStream ( this->referenceFile.c_str() );
if ( fastaStream )
{
while ( getline ( fastaStream, ligne) )
{
if( ligne.find( header ) == 0 )
{
ligne.replace(0,1,"");
ifstream fastaStream(this->referenceFile.c_str());
if (fastaStream) {
while (getline(fastaStream, ligne)) {
if (ligne.find(header) == 0) {
ligne.replace(0, 1, "");
// Deal with headers including spaces, etc...
if ( ligne.find( " ") != string::npos )
{
ligne = parseOnSep( ligne , " " )[0];
if (ligne.find(" ") != string::npos) {
ligne = parseOnSep(ligne, " ")[0];
}
currentChr = ligne;
map< int , char> tmpMap;
map<int, char> tmpMap;
this->referenceSequence[currentChr] = tmpMap;
}
else
{
for( unsigned int k = 0 ; k < ligne.length() ; k ++ )
{
} else {
for (unsigned int k = 0; k < ligne.length(); k++) {
mot = ligne[k];
mot = checkBase( mot );
mot = checkBase(mot);
this->referenceSequence[currentChr][i] = mot;
i ++;
i++;
}
}
}
......@@ -187,27 +173,23 @@ int Vihgo::getGenome()
return 0;
}
void Vihgo::displayGenome()
{
map<string, map<int, char> >::iterator myIterA ;
void Vihgo::displayGenome() {
map<string, map<int, char>>::iterator myIterA;
for( myIterA = this->referenceSequence.begin() ; myIterA != this->referenceSequence.end() ; myIterA ++ )
{
for (myIterA = this->referenceSequence.begin();
myIterA != this->referenceSequence.end(); myIterA++) {
cerr << "Chr " << myIterA->first << " : " << endl;
map<int, char> tmpMap;
tmpMap = myIterA->second;
map<int, char>::iterator myIterB;
for( myIterB = tmpMap.begin() ; myIterB != tmpMap.end() ; myIterB ++ )
{
for (myIterB = tmpMap.begin(); myIterB != tmpMap.end(); myIterB++) {
cerr << myIterB->first << " : " << myIterB->second << " ; ";
}
cerr << endl;
}
}
int Vihgo::getGenes()
{
int Vihgo::getGenes() {
struct timeval tbegin, tend;
string ligne;
string mot;
......@@ -223,77 +205,66 @@ int Vihgo::getGenes()
string strand;
// gff parsing
ifstream geneStream ( this->geneFile.c_str() );
if ( geneStream )
{
while ( getline ( geneStream, ligne) )
{
ifstream geneStream(this->geneFile.c_str());
if (geneStream) {
while (getline(geneStream, ligne)) {
// need to deal with header of the gff "#"
if( ligne.find( headerGff ) == 0 )
{
if (ligne.find(headerGff) == 0) {
continue;
}
i = 0;
// get information from the line
istringstream issLigne (ligne);
while ( getline( issLigne, mot, '\t' ) )
{
switch( i )
{
case 0 :
istringstream issLigne(ligne);
while (getline(issLigne, mot, '\t')) {
switch (i) {
case 0:
chromosome = mot;
break;
case 2 :
case 2:
cds = mot;
break;
case 3 :
start= mot;
case 3:
start = mot;
break;
case 4 :
case 4:
end = mot;
break;
case 6 :
case 6:
strand = mot;
break;
case 8 :
case 8:
annot = mot;
break;
default :
default:
break;
}
i ++;
i++;
}
if( cds == "CDS" )
{
istringstream issAnnot (annot);
while ( getline( issAnnot, mot, ';' ) )
{
if( mot.find( "protein_id" ) == 0 )
{
istringstream issMot( mot );
if (cds == "CDS") {
istringstream issAnnot(annot);
while (getline(issAnnot, mot, ';')) {
if (mot.find("protein_id") == 0) {
istringstream issMot(mot);
string subMot;
string g;
i = 0;
while ( getline( issMot, subMot, '=' ) )
{
if( i == 1 )
{
while (getline(issMot, subMot, '=')) {
if (i == 1) {
g = subMot;
}
i ++;
i++;
}
// modify gene string : deleting special chars
// cerr << "Gene before : " << subMot << endl;
subMot = pyReplace(subMot , "\"" , "");
subMot = pyReplace(subMot, "\"", "");
// cerr << "Gene after : " << subMot << endl;
// test if gene is already known
if (this->genes.count( subMot ) == 0 )
{
if (this->genes.count(subMot) == 0) {
// cerr << "Creating new gene : " << subMot << endl;
map<string , string> tmpMap;
map<string, string> tmpMap;
map<int, string> tmpMapT;
this->genes[subMot] = tmpMap;
// this->referenceCodon[subMot] = tmpMapT;
......@@ -303,23 +274,20 @@ int Vihgo::getGenes()
this->genes[subMot]["chrom"] = chromosome;
}
// add informations
if( (this->genes[subMot]["start"]).length() > 0 )
{
(this->genes[subMot]["start"]).append( "," );
if ((this->genes[subMot]["start"]).length() > 0) {
(this->genes[subMot]["start"]).append(",");
}
(this->genes[subMot]["start"]).append( start );
(this->genes[subMot]["start"]).append(start);
if( (this->genes[subMot]["end"]).length() > 0 )
{
(this->genes[subMot]["end"]).append( ",");
if ((this->genes[subMot]["end"]).length() > 0) {
(this->genes[subMot]["end"]).append(",");
}
(this->genes[subMot]["end"]).append( end );
(this->genes[subMot]["end"]).append(end);
if( (this->genes[subMot]["strand"]).length() > 0 )
{
if ((this->genes[subMot]["strand"]).length() > 0) {
(this->genes[subMot]["strand"]).append(",");
}
(this->genes[subMot]["strand"]).append( strand );
(this->genes[subMot]["strand"]).append(strand);
}
}
}
......@@ -328,93 +296,82 @@ int Vihgo::getGenes()
// this->displayGenes();
map< string , map< string , string > >::iterator myIterC;
for( myIterC = genes.begin() ; myIterC != genes.end() ; myIterC ++ )
{
map<string, map<string, string>>::iterator myIterC;
for (myIterC = genes.begin(); myIterC != genes.end(); myIterC++) {
string currentGene = myIterC->first;
// cerr << "getting codons for gene " << currentGene << endl;
chromosome = myIterC->second["chrom"];
vector<string> starts = parseOnSep( myIterC->second["start"] , "," );
vector<string> ends = parseOnSep( myIterC->second["end"] , "," );
vector<string> strands = parseOnSep( myIterC->second["strand"] , "," );
vector<string> starts = parseOnSep(myIterC->second["start"], ",");
vector<string> ends = parseOnSep(myIterC->second["end"], ",");
vector<string> strands = parseOnSep(myIterC->second["strand"], ",");
// create gene references in prot relative tables
map< int , string > tmpMap;
map< int , int > tmpMapSecond;
map<int, string> tmpMap;
map<int, int> tmpMapSecond;
this->protCodon[currentGene] = tmpMap;
this->refProtCorresp[currentGene] = tmpMapSecond;
int protPos = 1;
int geneSubPart = 0;
for( geneSubPart = 0 ; geneSubPart < starts.size() ; geneSubPart ++ )
{
map< int , char > posList;
for (geneSubPart = 0; geneSubPart < starts.size(); geneSubPart++) {
map<int, char> posList;
posList = this->referenceSequence[chromosome];
cerr << "getting position on the related chromosome " << chromosome << " : " << posList.size() << endl;
cerr << "getting position on the related chromosome " << chromosome
<< " : " << posList.size() << endl;
// sort position in the map
map< int , char>::iterator myIterE;
map<int, char>::iterator myIterE;
vector<int> pos;
for( myIterE = posList.begin() ; myIterE != posList.end() ; myIterE ++ )
{
pos.push_back( myIterE->first );
for (myIterE = posList.begin(); myIterE != posList.end(); myIterE++) {
pos.push_back(myIterE->first);
}
sort( pos.begin(), pos.begin()+pos.size() );
sort(pos.begin(), pos.begin() + pos.size());
if ( strands[geneSubPart] == "+" )
{
if (strands[geneSubPart] == "+") {
cerr << "\tStrand is + " << endl;
}
else
{
} else {
cerr << "\tStrand is - " << endl;
reverse(pos.begin(),pos.end());
reverse(pos.begin(), pos.end());
}
string codon = "";
vector<int>::iterator myIterF;
string s_start = starts[geneSubPart];
string s_end = ends[geneSubPart];
int i_start = string_to_int( s_start );
int i_end = string_to_int( s_end );
int i_start = string_to_int(s_start);
int i_end = string_to_int(s_end);
cerr << "Analyzing gene subpart : " << currentGene << " : " << chromosome << ":" << i_start << "-" << i_end << " : " << strands[geneSubPart] << endl;
cerr << "Analyzing gene subpart : " << currentGene << " : " << chromosome
<< ":" << i_start << "-" << i_end << " : " << strands[geneSubPart]
<< endl;
for( myIterF = pos.begin(); myIterF != pos.end() ; myIterF++ )
{
if( (*myIterF >= i_start) && (*myIterF <= i_end) )
{
for (myIterF = pos.begin(); myIterF != pos.end(); myIterF++) {
if ((*myIterF >= i_start) && (*myIterF <= i_end)) {
codon += this->referenceSequence[chromosome][*myIterF];
// store the codon if terminated
if( (codon.length() % 3 == 0) && (codon.length() != 0 ))
{
if (this->referenceCodon.count( chromosome ) == 0 )
{
map <int, string > tmpMap;
if ((codon.length() % 3 == 0) && (codon.length() != 0)) {
if (this->referenceCodon.count(chromosome) == 0) {
map<int, string> tmpMap;
this->referenceCodon[chromosome] = tmpMap;
}
// fill objects
// reference genomic codon
this->referenceCodon[chromosome][*myIterF - 2] = codon;
// protein aa
this->protCodon[currentGene][protPos] = this->codonTable[ codon ];
this->protCodon[currentGene][protPos] = this->codonTable[codon];
// corresp between genomic and proteic numerotation
this->refProtCorresp[currentGene][*myIterF - 2] = protPos;
// logs for test purposes
// cerr << "\tAdded " << codon << " : " << this->codonTable[ codon ] << " at g. pos " << *myIterF - 2 << " ; p. pos " << protPos << " in gene " << currentGene << endl;
// cerr << "\tAdded " << codon << " : " << this->codonTable[ codon ]
// << " at g. pos " << *myIterF - 2 << " ; p. pos " << protPos << "
// in gene " << currentGene << endl;
codon = "";
protPos ++;
protPos++;
}
}
}
......@@ -424,89 +381,82 @@ int Vihgo::getGenes()
return 0;
}
void Vihgo::displayGenes()
{
void Vihgo::displayGenes() {
cerr << "Displaying gene list : " << endl;
map<string, map< string , string> >::iterator myIterB;
for( myIterB = this->genes.begin() ;myIterB != this->genes.end() ; myIterB ++ )
{
map<string, map<string, string>>::iterator myIterB;
for (myIterB = this->genes.begin(); myIterB != this->genes.end(); myIterB++) {
cerr << "Gene : " << myIterB->first << " : " << endl;
map< string , string> tmpMap;
map<string, string> tmpMap;
tmpMap = myIterB->second;
cerr << "\tchr : " << tmpMap["chrom"] << " ; start : " << tmpMap["start"] << " ; end : " << tmpMap["end"] << " ; strand : " << tmpMap["strand"] << endl;
cerr << "\tchr : " << tmpMap["chrom"] << " ; start : " << tmpMap["start"]
<< " ; end : " << tmpMap["end"] << " ; strand : " << tmpMap["strand"]
<< endl;
}
}
void Vihgo::displayCodons()
{
void Vihgo::displayCodons() {
cerr << "Displaying prot codon list : " << endl;
//std::map<std::string, std::map< int , std::string> > protCodon;
map< string, map< int , string> >::iterator myIterA;
for( myIterA = this->protCodon.begin() ;myIterA != this->protCodon.end() ; myIterA ++ )
{
// std::map<std::string, std::map< int , std::string> > protCodon;
map<string, map<int, string>>::iterator myIterA;
for (myIterA = this->protCodon.begin(); myIterA != this->protCodon.end();
myIterA++) {
cerr << "Gene : " << myIterA->first << " : " << endl;
map< int , string> tmpMap;
map<int, string> tmpMap;
tmpMap = myIterA->second;
map<int , string>::iterator myIterB;
for(myIterB = tmpMap.begin() ; myIterB != tmpMap.end() ; myIterB ++)
{
cerr << "\tPos : " << myIterB->first << " ; AA : " << myIterB->second << endl;
map<int, string>::iterator myIterB;
for (myIterB = tmpMap.begin(); myIterB != tmpMap.end(); myIterB++) {
cerr << "\tPos : " << myIterB->first << " ; AA : " << myIterB->second
<< endl;
}
}
}
void Vihgo::displayReferenceCodons()
{
void Vihgo::displayReferenceCodons() {
// std::map<std::string, std::map< int , std::string> > referenceCodon;
map<string , map<int , string> >::iterator myIterA;
map<int , string>::iterator myIterB;
map<string, map<int, string>>::iterator myIterA;
map<int, string>::iterator myIterB;
cerr << "Reference codons : " << endl;
for (myIterA = this->referenceCodon.begin() ; myIterA != this->referenceCodon.end() ; myIterA ++ )
{
for (myIterA = this->referenceCodon.begin();
myIterA != this->referenceCodon.end(); myIterA++) {
cerr << "\t" << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++)
{
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
cerr << "\t\t" << myIterB->first << " : " << myIterB->second << endl;
}
}
}
void Vihgo::displayData( string incType )
{
map<string , map<int , map< string , int> > >::iterator myIterA;
map<int , map< string , int> >::iterator myIterB;
map< string , int>::iterator myIterC;
void Vihgo::displayData(string incType) {
map<string, map<int, map<string, int>>>::iterator myIterA;
map<int, map<string, int>>::iterator myIterB;
map<string, int>::iterator myIterC;
if ( incType == "control" )
{
for( myIterA = this->controlData.begin() ; myIterA != this->controlData.end() ; myIterA ++ )
{
if (incType == "control") {
for (myIterA = this->controlData.begin();
myIterA != this->controlData.end(); myIterA++) {
cerr << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++ )
{
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
cerr << "\t" << myIterB->first << " : " << endl;
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
cerr << "\t\t" << myIterC->first << " : " << myIterC->second << endl;
}
}
}
}
if ( incType == "sample" )
{
for( myIterA = this->sampleData.begin() ; myIterA != this->sampleData.end() ; myIterA ++ )
{
if (incType == "sample") {
for (myIterA = this->sampleData.begin(); myIterA != this->sampleData.end();
myIterA++) {
cerr << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++ )
{
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
cerr << "\t" << myIterB->first << " : " << endl;
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
cerr << "\t\t" << myIterC->first << " : " << myIterC->second << endl;
}
}
......@@ -516,90 +466,96 @@ void Vihgo::displayData( string incType )
// Method to write down the codons
// NEED A REWAMP FOR ADJUSTMENT TO NEW DATA STORAGE
void Vihgo::writeCodonFile( string incType )
{
map<string , map<int , map< string , int> > >::iterator myIterA;
map<int , map< string , int> >::iterator myIterB;
map< string , int>::iterator myIterC;
void Vihgo::writeCodonFile(string incType) {
map<string, map<int, map<string, int>>>::iterator myIterA;
map<int, map<string, int>>::iterator myIterB;
map<string, int>::iterator myIterC;
cerr << "Writing codons ... " << endl;
ofstream codonStream;
codonStream.open( (this->codonResultFile).c_str() , ios::out );
codonStream.open((this->codonResultFile).c_str(), ios::out);
// I/O optimisation
ios_base::sync_with_stdio(false);
cin.tie(nullptr);
// header
codonStream << "#Chrom\tGenomicPosition\trefProt\tReferenceCodon\tReferenceAA\taaPosition\tAlternativeCodon\tAlternativeAA/Depth\tAlternativeAlleleRatio\tcrtlCounts\tctrlRatio" << endl;
if ( incType == "sample" )
{
for( myIterA = this->sampleData.begin() ; myIterA != this->sampleData.end() ; myIterA ++ )
{
codonStream << "#Chrom\tGenomicPosition\trefProt\tReferenceCodon\tReferenceAA"
"\taaPosition\tAlternativeCodon\tAlternativeAA/"
"Depth\tAlternativeAlleleRatio\tcrtlCounts\tctrlRatio"
<< endl;
if (incType == "sample") {
for (myIterA = this->sampleData.begin(); myIterA != this->sampleData.end();
myIterA++) {
// codonStream << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++ )
{
//get prot name for this position
string genes = isInGene( myIterA->first , myIterB->first );
vector <string> g;
g = parseOnSep( genes , "," );
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
// get prot name for this position
string genes = isInGene(myIterA->first, myIterB->first);
vector<string> g;
g = parseOnSep(genes, ",");
vector<string>::iterator myIterD;
for (myIterD = g.begin() ; myIterD != g.end() ; myIterD ++ )
{
for (myIterD = g.begin(); myIterD != g.end(); myIterD++) {
string ref = *myIterD;
string refCodon = this->referenceCodon[myIterA->first][myIterB->first];
string refCodon =
this->referenceCodon[myIterA->first][myIterB->first];
string refAA = this->getAAfromCodon(refCodon);
int position = this->refProtCorresp[ref][myIterB->first];
double depth = 0.0;
//compute total depth at this pos
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
// compute total depth at this pos
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
depth += myIterC->second;
}
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
string altCodon = myIterC->first;
string altAA = this->getAAfromCodon(altCodon);
double altCount = myIterC->second;
double allelicRatio = ( altCount / depth ) * 100;
double allelicRatio = (altCount / depth) * 100;
double ctrlCounts = 0.0;
double ctrlRatio = 0.0;
double ctrlDepth = 0.0;
if ( this->controlData.count( myIterA->first ) > 0 )
{
if (this->controlData.count(myIterA->first) > 0) {
// cerr << "\tfound ref in ctrl : " << myIterA->first << endl;
if( this->controlData[myIterA->first].count( myIterB->first ) > 0 )
{
// cerr << "\tfound position in ctrl : " << myIterB->first << endl;
if (this->controlData[myIterA->first].count(myIterB->first) > 0) {
// cerr << "\tfound position in ctrl : " << myIterB->first <<
// endl;
// compute depth for control
map< string , int>::iterator myIterE;
map<string, int>::iterator myIterE;
for ( myIterE = this->controlData[myIterA->first][myIterB->first].begin() ; myIterE != this->controlData[myIterA->first][myIterB->first].end() ; myIterE ++ )
{
for (myIterE = this->controlData[myIterA->first][myIterB->first]
.begin();
myIterE !=
this->controlData[myIterA->first][myIterB->first].end();
myIterE++) {
ctrlDepth += myIterE->second;
}
if (this->controlData[myIterA->first][myIterB->first][altCodon] > 0 )
{
ctrlCounts = this->controlData[myIterA->first][myIterB->first][altCodon];
// cerr << "\tfound codon in ctrl : " << altCodon << " : " << this->controlData[myIterA->first][myIterB->first][altCodon] << endl;
if (this->controlData[myIterA->first][myIterB->first]
[altCodon] > 0) {
ctrlCounts = this->controlData[myIterA->first][myIterB->first]
[altCodon];
// cerr << "\tfound codon in ctrl : " << altCodon << " : " <<
// this->controlData[myIterA->first][myIterB->first][altCodon]
// << endl;
}
ctrlRatio = ( ctrlCounts / ctrlDepth ) * 100;
ctrlRatio = (ctrlCounts / ctrlDepth) * 100;
}
}
// write data
codonStream << myIterA->first << "\t" << myIterB->first << "\t" << ref << "\t" << refCodon << "\t" << refAA << "\t" << position << "\t" << altCodon << "\t" << altAA << "\t" << altCount << "/" << (int)depth << "\t" << allelicRatio << "\t" << ctrlCounts << "/" << ctrlDepth << "\t" << ctrlRatio << endl;
codonStream << myIterA->first << "\t" << myIterB->first << "\t"
<< ref << "\t" << refCodon << "\t" << refAA << "\t"
<< position << "\t" << altCodon << "\t" << altAA << "\t"
<< altCount << "/" << (int)depth << "\t" << allelicRatio
<< "\t" << ctrlCounts << "/" << ctrlDepth << "\t"
<< ctrlRatio << endl;
}
}
}
......@@ -609,13 +565,10 @@ void Vihgo::writeCodonFile( string incType )
codonStream.close();
}
// Method to get annotation for variation
// Actually has been deactivated yet
// NEED A REWAMP FOR ADAPTATION TO ALL VIRUS; VCF FORMAT
int Vihgo::getAnnotation()
{
int Vihgo::getAnnotation() {
// vector<string> fileToParse;
// cerr << "Starting annotation parsing " << endl;
// if( !( this->isAnnotationDefined() ) )
......@@ -631,7 +584,8 @@ int Vihgo::getAnnotation()
// string ligne;
// string chromosome;
//
// for( myIterA = fileToParse.begin() ; myIterA != fileToParse.end() ; myIterA ++)
// for( myIterA = fileToParse.begin() ; myIterA != fileToParse.end() ; myIterA
// ++)
// {
//
// // rt parsing
......@@ -665,21 +619,21 @@ int Vihgo::getAnnotation()
//
// chromosome = pyReplace( ligne , "#" , "");
// chromosome = strip(chromosome);
// cerr << "Found a # with line : " << ligne << endl ;
// cerr << "\t\tto " << chromosome << "." << endl;
// if( this->referenceCodon.count( chromosome ) == 0 )
// cerr << "Found a # with line : " << ligne << endl
// ; cerr << "\t\tto " << chromosome << "." << endl; if(
// this->referenceCodon.count( chromosome ) == 0 )
// {
// cerr << chromosome << " not found in reference : passing " << endl;
// this->annotationIsOk = false;
// break;
// cerr << chromosome << " not found in reference : passing "
// << endl; this->annotationIsOk = false; break;
// }
//
//
// if (this->knownMutationTable.count( chromosome ) == 0 )
// if (this->knownMutationTable.count( chromosome ) == 0
// )
// {
// map< string , string > tmpMap;
// this->knownMutationTable[chromosome] = tmpMap;
// cerr << chromosome << " newly created as an annotation " << endl;
// this->knownMutationTable[chromosome] =
// tmpMap; cerr << chromosome << " newly created as an annotation " << endl;
// }
// }
//
......@@ -717,8 +671,8 @@ int Vihgo::getAnnotation()
// {
// continue;
// }
// this->knownMutationTable[chromosome][index] = conclusion;
// cerr << "\tAdding " << index << " aka " << conclusion << endl;
// this->knownMutationTable[chromosome][index] =
// conclusion; cerr << "\tAdding " << index << " aka " << conclusion << endl;
// }
// }
// annotStream.close();
......@@ -726,18 +680,17 @@ int Vihgo::getAnnotation()
return 0;
}
bool Vihgo::isAnnotationDefined()
{
if( ((this->proteaseFile).length() > 0) && (this->integraseFile.length() > 0) && (this->retrotranscriptaseFile.length() > 0) )
{
bool Vihgo::isAnnotationDefined() {
if (((this->proteaseFile).length() > 0) &&
(this->integraseFile.length() > 0) &&
(this->retrotranscriptaseFile.length() > 0)) {
return true;
}
return false;
}
// parse the control bam file to get informations on background of the virus
int Vihgo::readRunControl()
{
int Vihgo::readRunControl() {
long int nbLineRead = 0;
int readPassedAln = 0;
int readPassedSecondary = 0;
......@@ -747,42 +700,41 @@ int Vihgo::readRunControl()
cerr << "Reading control BAM file..." << endl;
bam1_t *aln = bam_init1();
samFile *fp_in = hts_open( (this->getControlBamFile()).c_str() , "r" );
bam_hdr_t *bamHdr = sam_hdr_read( fp_in);
samFile *fp_in = hts_open((this->getControlBamFile()).c_str(), "r");
bam_hdr_t *bamHdr = sam_hdr_read(fp_in);
while(sam_read1(fp_in , bamHdr , aln) > 0)
{
nbLineRead ++;
if ((nbLineRead != 0 ) && ((nbLineRead % 200000) == 0) )
{
cerr << "\t" << nbLineRead << " aln read ; \n\t\tSkipped : " << readPassedAln << " unaligned ; " << readPassedSecondary << " secondary alignements ; " << readPassedSupplementary << " supplementary alignements ; " << readPassedMQ << " bad aln quality ; " << readPassedCIGAROp << " special CIGAR operation " << endl;
while (sam_read1(fp_in, bamHdr, aln) > 0) {
nbLineRead++;
if ((nbLineRead != 0) && ((nbLineRead % 200000) == 0)) {
cerr << "\t" << nbLineRead
<< " aln read ; \n\t\tSkipped : " << readPassedAln << " unaligned ; "
<< readPassedSecondary << " secondary alignements ; "
<< readPassedSupplementary << " supplementary alignements ; "
<< readPassedMQ << " bad aln quality ; " << readPassedCIGAROp
<< " special CIGAR operation " << endl;
}
// skip alignement if is unmmapped
if ( (aln->core.flag & BAM_FUNMAP) == BAM_FUNMAP )
{
readPassedAln ++;
if ((aln->core.flag & BAM_FUNMAP) == BAM_FUNMAP) {
readPassedAln++;
continue;
}
// skip alignement if secondary alignement
if ( (aln->core.flag & BAM_FSECONDARY) == BAM_FSECONDARY)
{
readPassedSecondary ++;
if ((aln->core.flag & BAM_FSECONDARY) == BAM_FSECONDARY) {
readPassedSecondary++;
continue;
}
// skip alignement if supplmentary alignement
if ( (aln->core.flag & BAM_FSUPPLEMENTARY) == BAM_FSUPPLEMENTARY)
{
readPassedSupplementary ++;
if ((aln->core.flag & BAM_FSUPPLEMENTARY) == BAM_FSUPPLEMENTARY) {
readPassedSupplementary++;
continue;
}
// Skip alignment if quality score is less than 30
if (aln->core.qual < 30)
{
readPassedMQ ++;
if (aln->core.qual < 30) {
readPassedMQ++;
continue;
}
......@@ -792,9 +744,10 @@ int Vihgo::readRunControl()
string chr = bamHdr->target_name[aln->core.tid];
// Deal with unknown chromosome
if (!( this->isInGeneList( chr ) ))
{
cerr << "Read is mapped on a reference not known. Please verify the reference provided." << endl;
if (!(this->isInGeneList(chr))) {
cerr << "Read is mapped on a reference not known. Please verify the "
"reference provided."
<< endl;
continue;
}
......@@ -806,21 +759,19 @@ int Vihgo::readRunControl()
// cerr << "Read is in a known gene : " << *myIterA << endl;
// }
// cigar string event detection ; need to be improved to deal with insertion / deletion
// pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD, BACK
// cigar string event detection ; need to be improved to deal with insertion
// / deletion pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD,
// BACK
bool readNeedToBeSkipped = false;
for (int k = 0; k < aln->core.n_cigar; k++)
{
const int op = bam_cigar_op( bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen( bam_get_cigar(aln)[k]);
if ( op == BAM_CREF_SKIP || op == BAM_CPAD || op == BAM_CBACK )
{
for (int k = 0; k < aln->core.n_cigar; k++) {
const int op = bam_cigar_op(bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen(bam_get_cigar(aln)[k]);
if (op == BAM_CREF_SKIP || op == BAM_CPAD || op == BAM_CBACK) {
readNeedToBeSkipped = true;
}
}
if (readNeedToBeSkipped )
{
readPassedCIGAROp ++;
if (readNeedToBeSkipped) {
readPassedCIGAROp++;
continue;
}
......@@ -829,89 +780,82 @@ int Vihgo::readRunControl()
// string quality = bam1_qual( aln );
// deal with stop option
if ( this->getCountStop() )
{
if (this->detectStopInRead( aln , pos , chr ))
{
if (this->getCountStop()) {
if (this->detectStopInRead(aln, pos, chr)) {
continue;
}
}
int cumulOffset = 0;
for (int k = 0; k < aln->core.n_cigar; k++)
{
const int op = bam_cigar_op( bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen( bam_get_cigar(aln)[k]);
for (int k = 0; k < aln->core.n_cigar; k++) {
const int op = bam_cigar_op(bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen(bam_get_cigar(aln)[k]);
// deal with the cigar special ops
// HARD CLIPPING : No offset increment , no analysis of bases
if ( op == BAM_CHARD_CLIP )
{
if (op == BAM_CHARD_CLIP) {
continue;
}
// SOFT CLIPPING : offset increment ; no analysis of bases
if ( op == BAM_CSOFT_CLIP )
{
if (op == BAM_CSOFT_CLIP) {
cumulOffset += ol;
continue;
}
// insertion
if( op == BAM_CINS )
{
if (op == BAM_CINS) {
// progress on query to get the inserted sequence.
string insertedSequence = "+";
// need to determine on which codon we are
int currentGenomicPosition = pos;
int codonStartPos;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
codonStartPos = currentGenomicPosition;
}
else
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) > 0 )
{
} else {
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) >
0) {
codonStartPos = currentGenomicPosition - 1;
}
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) >
0) {
codonStartPos = currentGenomicPosition - 2;
}
}
// get the bases of the inserted sequence
for ( int x = 0 ; x < ol ; x++)
{
int a = bam_seqi( sequence , cumulOffset + x );
for (int x = 0; x < ol; x++) {
int a = bam_seqi(sequence, cumulOffset + x);
char base;
base = this->getBase( a );
base = this->getBase(a);
insertedSequence += base;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( codonStartPos ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(codonStartPos) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][codonStartPos] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][codonStartPos].count( insertedSequence ) > 0 ) )
{
if (!(this->sampleData[chr][codonStartPos].count(insertedSequence) >
0)) {
this->sampleData[chr][codonStartPos][insertedSequence] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this->sampleData[chr][codonStartPos][insertedSequence] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this->sampleData[chr][codonStartPos][insertedSequence]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : progress length of insertion
cumulOffset += ol;
......@@ -920,57 +864,56 @@ int Vihgo::readRunControl()
}
// insertion
if( op == BAM_CDEL )
{
if (op == BAM_CDEL) {
// progress on query to get the inserted sequence.
string deletedSequence = "-";
// need to determine on which codon we are
int currentGenomicPosition = pos;
int codonStartPos;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
codonStartPos = currentGenomicPosition;
}
else
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) > 0 )
{
} else {
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) >
0) {
codonStartPos = currentGenomicPosition - 1;
}
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) >
0) {
codonStartPos = currentGenomicPosition - 2;
}
}
// get the bases of the reference sequence
for ( int x = 0 ; x < ol ; x++)
{
for (int x = 0; x < ol; x++) {
char base;
base = this->getReferenceBase( chr , x + currentGenomicPosition );
base = this->getReferenceBase(chr, x + currentGenomicPosition);
deletedSequence += base;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( codonStartPos ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(codonStartPos) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][codonStartPos] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][codonStartPos].count( deletedSequence ) > 0 ) )
{
if (!(this->sampleData[chr][codonStartPos].count(deletedSequence) >
0)) {
this->sampleData[chr][codonStartPos][deletedSequence] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this->sampleData[chr][codonStartPos][deletedSequence] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this->sampleData[chr][codonStartPos][deletedSequence]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : no progress
// on reference : progress the length of the op
......@@ -978,51 +921,48 @@ int Vihgo::readRunControl()
continue;
}
for( int i = 0 ; i < aln->core.l_qseq ; i ++ )
{
for (int i = 0; i < aln->core.l_qseq; i++) {
// check if we are at a codon position
int currentGenomicPosition = pos + i;
string codon;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
for ( int x = 0 ; x < 3 ; x++)
{
int a = bam_seqi( sequence , i+x );
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
for (int x = 0; x < 3; x++) {
int a = bam_seqi(sequence, i + x);
char base;
base = this->getBase( a );
base = this->getBase(a);
codon += base;
}
i += 2;
if (codon.find( "!" ) != string::npos )
{
if (codon.find("!") != string::npos) {
continue;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( currentGenomicPosition ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(currentGenomicPosition) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][currentGenomicPosition] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][currentGenomicPosition].count( codon ) > 0 ) )
{
if (!(this->sampleData[chr][currentGenomicPosition].count(codon) >
0)) {
this->sampleData[chr][currentGenomicPosition][codon] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
this->sampleData[chr][currentGenomicPosition][codon] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
else
{
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in
// control data " << endl;
}
this->sampleData[chr][currentGenomicPosition][codon]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
} else {
continue;
}
}
......@@ -1031,34 +971,33 @@ int Vihgo::readRunControl()
}
// destruct objects
bam_hdr_destroy( bamHdr );
sam_close( fp_in );
bam_destroy1( aln );
bam_hdr_destroy(bamHdr);
sam_close(fp_in);
bam_destroy1(aln);
return 0;
}
int Vihgo::variantCalling()
{
int Vihgo::variantCalling() {
// estimate BG noise on control data
// we need to eliminate "real" variants by testing their vaf to all the sample VAF
// If a variant is detected, a new turn begin while excluding detected variation.
// we need to eliminate "real" variants by testing their vaf to all the sample
// VAF If a variant is detected, a new turn begin while excluding detected
// variation.
// First compute all VAF
map<string , map<int , map< string , int> > >::iterator myIterA;
map<int , map< string , int> >::iterator myIterB;
map< string , int>::iterator myIterC;
map<string , map<int , vector<double> > > VAFControlTable;
map<string , map<int , vector<double> > > VAFTable;
if( this->isThereAControl() )
{
map<string, map<int, map<string, int>>>::iterator myIterA;
map<int, map<string, int>>::iterator myIterB;
map<string, int>::iterator myIterC;
map<string, map<int, vector<double>>> VAFControlTable;
map<string, map<int, vector<double>>> VAFTable;
if (this->isThereAControl()) {
// VAF for control
for( myIterA = this->controlData.begin() ; myIterA != this->controlData.end() ; myIterA ++ )
{
for (myIterA = this->controlData.begin();
myIterA != this->controlData.end(); myIterA++) {
// cerr << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++ )
{
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
// cerr << "\t" << myIterB->first << " : " << endl;
double totalDepth = 0.0;
double altNumber = 0.0;
......@@ -1066,48 +1005,46 @@ int Vihgo::variantCalling()
string refCodon = this->referenceCodon[myIterA->first][myIterB->first];
string aa = this->getAAfromCodon(refCodon);
// cerr << "Analysing codon " << myIterA->first << ":" << myIterB->first << " ; Ref is : " << refCodon << " : " << aa << endl;
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
// cerr << "Analysing codon " << myIterA->first << ":" <<
// myIterB->first << " ; Ref is : " << refCodon << " : " << aa << endl;
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
string currentCodon = myIterC->first;
string currentAa = this->getAAfromCodon(currentCodon);
// cerr << "\tin control : " << currentCodon << " : " << currentAa << " : " << myIterC->second << endl;
// cerr << "\tin control : " << currentCodon << " : " << currentAa <<
// " : " << myIterC->second << endl;
if (!( currentAa == aa ))
{
if (!(currentAa == aa)) {
altNumber += myIterC->second;
}
totalDepth += myIterC->second;
}
// cerr << "\t\tRepresenting in total : " << altNumber << "/" << totalDepth << endl;
// cerr << "\t\tRepresenting in total : " << altNumber << "/" <<
// totalDepth << endl;
if (!(VAFControlTable.count( myIterA->first ) > 0 ) )
{
map<int , vector<double> > tmpMap;
if (!(VAFControlTable.count(myIterA->first) > 0)) {
map<int, vector<double>> tmpMap;
VAFControlTable[myIterA->first] = tmpMap;
}
double x = (altNumber / totalDepth);
vector<double> tmpVect;
tmpVect.push_back( altNumber );
tmpVect.push_back( totalDepth );
tmpVect.push_back(altNumber);
tmpVect.push_back(totalDepth);
VAFControlTable[myIterA->first][myIterB->first] = tmpVect;
// cerr << myIterA->first << " : " << myIterB->first << " : " << VAFControlTable[myIterA->first][myIterB->first] << endl;
// cerr << myIterA->first << " : " << myIterB->first << " : " <<
// VAFControlTable[myIterA->first][myIterB->first] << endl;
}
}
}
// VAF for sample
for( myIterA = this->sampleData.begin() ; myIterA != this->sampleData.end() ; myIterA ++ )
{
for (myIterA = this->sampleData.begin(); myIterA != this->sampleData.end();
myIterA++) {
// cerr << myIterA->first << " : " << endl;
for( myIterB = (myIterA->second).begin() ; myIterB != (myIterA->second).end() ; myIterB ++ )
{
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
// cerr << "\t" << myIterB->first << " : " << endl;
double totalDepth = 0.0;
double altNumber = 0.0;
......@@ -1115,82 +1052,76 @@ int Vihgo::variantCalling()
string refCodon = this->referenceCodon[myIterA->first][myIterB->first];
string aa = this->getAAfromCodon(refCodon);
// cerr << "Analysing codon " << myIterA->first << ":" << myIterB->first << " ; Ref is : " << refCodon << " : " << aa << endl;
for( myIterC = (myIterB->second).begin() ; myIterC != (myIterB->second).end() ; myIterC ++ )
{
// cerr << "Analysing codon " << myIterA->first << ":" << myIterB->first
// << " ; Ref is : " << refCodon << " : " << aa << endl;
for (myIterC = (myIterB->second).begin();
myIterC != (myIterB->second).end(); myIterC++) {
string currentCodon = myIterC->first;
string currentAa = this->getAAfromCodon(currentCodon);
// cerr << "\tin sample : " << currentCodon << " : " << currentAa << " : " << myIterC->second << endl;
// cerr << "\tin sample : " << currentCodon << " : " << currentAa << " :
// " << myIterC->second << endl;
if (!( currentAa == aa ))
{
if (!(currentAa == aa)) {
altNumber += myIterC->second;
}
totalDepth += myIterC->second;
}
// cerr << "\t\tRepresenting in total : " << altNumber << "/" << totalDepth << endl;
// cerr << "\t\tRepresenting in total : " << altNumber << "/" <<
// totalDepth << endl;
if (!(VAFTable.count( myIterA->first ) > 0 ) )
{
map<int , vector<double> > tmpMap;
if (!(VAFTable.count(myIterA->first) > 0)) {
map<int, vector<double>> tmpMap;
VAFTable[myIterA->first] = tmpMap;
}
double x = (altNumber / totalDepth);
vector<double> tmpVect;
tmpVect.push_back( altNumber );
tmpVect.push_back( totalDepth );
tmpVect.push_back(altNumber);
tmpVect.push_back(totalDepth);
// VAFControlTable[myIterA->first][myIterB->first] = x * 100;
VAFTable[myIterA->first][myIterB->first] = tmpVect;
// cerr << myIterA->first << " : " << myIterB->first << " : " << VAFControlTable[myIterA->first][myIterB->first] << endl;
// cerr << myIterA->first << " : " << myIterB->first << " : " <<
// VAFControlTable[myIterA->first][myIterB->first] << endl;
}
}
double totalAlt = 0.0;
double totalDepth = 0.0;
// looping while we find a variation
// a variation is a position where the ratio is different from the whole BG noise from the sample
// a variation is a position where the ratio is different from the whole BG
// noise from the sample
int numberOfVariation = 1;
// we'll store variation position in this map and exclude the position found in it at each turn
map<string , map<int , int> > positionToExclude;
// we'll store variation position in this map and exclude the position found
// in it at each turn
map<string, map<int, int>> positionToExclude;
int nTurn = 1;
while (numberOfVariation > 0)
{
while (numberOfVariation > 0) {
cerr << "Computing background noise : turn " << nTurn << endl;
numberOfVariation = 0;
vector<double> t;
t.push_back( 0.0 );
t.push_back( 0.0 );
t.push_back(0.0);
t.push_back(0.0);
vector<double> a;
a.push_back( 0.0 );
a.push_back( 0.0 );
a.push_back(0.0);
a.push_back(0.0);
map<string , map<int , vector<double> > >::iterator myIterD;
map<int , vector<double> >::iterator myIterE;
map<string, map<int, vector<double>>>::iterator myIterD;
map<int, vector<double>>::iterator myIterE;
// compute a
for ( myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD ++)
{
for ( myIterE = (myIterD->second).begin() ; myIterE != (myIterD->second).end() ; myIterE++)
{
for (myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD++) {
for (myIterE = (myIterD->second).begin();
myIterE != (myIterD->second).end(); myIterE++) {
// check if position exist in the excluded position
if ( positionToExclude.count( myIterD->first ) > 0 )
{
if( positionToExclude[myIterD->first].count( myIterE->first ) > 0 )
{
if (positionToExclude.count(myIterD->first) > 0) {
if (positionToExclude[myIterD->first].count(myIterE->first) > 0) {
continue;
}
}
a[0] += VAFTable[myIterD->first][myIterE->first][0];
a[1] += VAFTable[myIterD->first][myIterE->first][1];
}
......@@ -1198,18 +1129,14 @@ int Vihgo::variantCalling()
totalAlt = a[0];
totalDepth = a[1];
// test each position
for ( myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD ++)
{
for ( myIterE = (myIterD->second).begin() ; myIterE != (myIterD->second).end() ; myIterE++)
{
for (myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD++) {
for (myIterE = (myIterD->second).begin();
myIterE != (myIterD->second).end(); myIterE++) {
// check if position exist in the excluded position
if ( positionToExclude.count( myIterD->first ) > 0 )
{
if( positionToExclude[myIterD->first].count( myIterE->first ) > 0 )
{
if (positionToExclude.count(myIterD->first) > 0) {
if (positionToExclude[myIterD->first].count(myIterE->first) > 0) {
continue;
}
}
......@@ -1217,36 +1144,40 @@ int Vihgo::variantCalling()
t[0] = VAFTable[myIterD->first][myIterE->first][0];
t[1] = VAFTable[myIterD->first][myIterE->first][1];
double p = chisquare( t , a );
// cerr << "testing : " << t[0] << " : " << t[1] << " vs " << a[0] << " : " << a[1] << " ; Pval = " << p << endl;
double p = chisquare(t, a);
// cerr << "testing : " << t[0] << " : " << t[1] << " vs " << a[0] << "
// : " << a[1] << " ; Pval = " << p << endl;
if ( p < 0.05 )
{
numberOfVariation ++;
if (p < 0.05) {
numberOfVariation++;
positionToExclude[myIterD->first][myIterE->first] = 1;
cerr << "Excluding " << myIterD->first << ":" <<myIterE->first << " with " << t[0] << " : " << t[1] << " vs " << a[0] << " : " << a[1] << " ; Pval = " << p << endl;
cerr << "Excluding " << myIterD->first << ":" << myIterE->first
<< " with " << t[0] << " : " << t[1] << " vs " << a[0] << " : "
<< a[1] << " ; Pval = " << p << endl;
}
}
}
cerr << "End of turn, with " << numberOfVariation << " to exclude" << endl;
nTurn ++;
nTurn++;
}
// now counting for the sample.
vector<double> bg;
bg.push_back( totalAlt );
bg.push_back( totalDepth );
bg.push_back(totalAlt);
bg.push_back(totalDepth);
// map<string , map<int , vector<double> > > VAFControlTable;
// variant calling itself
// open output files
// result file
ofstream outresultStream;
outresultStream.open( (this->outputFile).c_str() , ios::out);
outresultStream << "#chromosome\tgPosition\tgene\tAAPosition\treferenceCodon\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlternativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation" << endl;
outresultStream.open((this->outputFile).c_str(), ios::out);
outresultStream << "#chromosome\tgPosition\tgene\tAAPosition\treferenceCodon"
"\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlter"
"nativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation"
<< endl;
// test each position
cerr << "Variant calling in progress ... " << endl;
......@@ -1254,36 +1185,31 @@ int Vihgo::variantCalling()
int nbVariant = 0;
int nbCall = 0;
map<string , map<int , vector<double> > >::iterator myIterD;
map<int , vector<double> >::iterator myIterE;
for ( myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD ++)
{
for ( myIterE = (myIterD->second).begin() ; myIterE != (myIterD->second).end() ; myIterE++)
{
map< string, int> tmpMap;
map<string, map<int, vector<double>>>::iterator myIterD;
map<int, vector<double>>::iterator myIterE;
for (myIterD = VAFTable.begin(); myIterD != VAFTable.end(); myIterD++) {
for (myIterE = (myIterD->second).begin();
myIterE != (myIterD->second).end(); myIterE++) {
map<string, int> tmpMap;
tmpMap = sampleData[myIterD->first][myIterE->first];
map<string , int>::iterator myIterF;
map<string, int>::iterator myIterF;
double total = 0;
for ( myIterF = tmpMap.begin() ; myIterF != tmpMap.end() ; myIterF ++ )
{
for (myIterF = tmpMap.begin(); myIterF != tmpMap.end(); myIterF++) {
total += myIterF->second;
}
// pass if depth isn't enough
if (total < this->depthThreshold)
{
nbCall ++;
if (total < this->depthThreshold) {
nbCall++;
continue;
}
for ( myIterF = tmpMap.begin() ; myIterF != tmpMap.end() ; myIterF ++ )
{
for (myIterF = tmpMap.begin(); myIterF != tmpMap.end(); myIterF++) {
cerr << "\t\t" << nbCall << endl;
if ((nbCall != 0 ) && ((nbCall % 1000) == 0) )
{
cerr << "\t" << nbCall << " position tested ; with : " << nbVariant << " found" << endl;
if ((nbCall != 0) && ((nbCall % 1000) == 0)) {
cerr << "\t" << nbCall << " position tested ; with : " << nbVariant
<< " found" << endl;
}
// cerr << "\t\tgetting ref informations ..." << endl;
......@@ -1295,118 +1221,112 @@ int Vihgo::variantCalling()
string currentAa = this->getAAfromCodon(currentCodon);
// disengage variant calling if ref aa = alt aa (synonymous)
if (aa == currentAa)
{
if (aa == currentAa) {
continue;
}
vector<double> s;
s.push_back( 0.0 );
s.push_back( 0.0 );
s.push_back(0.0);
s.push_back(0.0);
vector<double> c;
c.push_back( 0.0 );
c.push_back( 0.0 );
c.push_back(0.0);
c.push_back(0.0);
if ( this->isThereAControl() )
{
if ( VAFControlTable.count( myIterD->first ) > 0 )
{
if ( (VAFControlTable[myIterD->first]).count(myIterE->first) > 0 )
{
if (this->isThereAControl()) {
if (VAFControlTable.count(myIterD->first) > 0) {
if ((VAFControlTable[myIterD->first]).count(myIterE->first) > 0) {
c[0] = VAFControlTable[myIterD->first][myIterE->first][0];
c[1] = VAFControlTable[myIterD->first][myIterE->first][1];
}
}
}
// cerr << "getting myIterF second : " << myIterF->first << " : " << myIterF->second << endl;
// cerr << "getting myIterF second : " << myIterF->first << " : " <<
// myIterF->second << endl;
s[0] = myIterF->second;
s[1] = total;
// control if ratio is enough
double ratio = s[0] / total;
if (ratio < this->ratioThreshold)
{
nbCall ++;
if (ratio < this->ratioThreshold) {
nbCall++;
continue;
}
if ( ( (s[0]/s[1]) < (c[0]/c[1]) ) and (this->isThereAControl()) )
{
nbCall ++;
if (((s[0] / s[1]) < (c[0] / c[1])) and (this->isThereAControl())) {
nbCall++;
continue;
}
double p;
double pc;
// FET test
if ( this->isThereAControl() )
{
cerr << "\tTesting : " << s[0] << " : " << s[1] << " vs " << c[0] << " : " << c[1];
double p = fisher_test( s , c );
if (this->isThereAControl()) {
cerr << "\tTesting : " << s[0] << " : " << s[1] << " vs " << c[0]
<< " : " << c[1];
double p = fisher_test(s, c);
cerr << " ; Pval = " << p << endl;
}
else
{
} else {
p = 0;
}
if ( p < 0.01 )
{
if (p < 0.01) {
double pc;
string annotation = "No annotation found";
// need to perform the chi2 test vs background noise
pc = fisher_test( s , bg );
cerr << "Performing test vs BG Noise : " << s[0] << " : " << s[1] << " vs " << bg[0] << " : " << bg[1] << " ; Pval = " << pc << endl;
pc = fisher_test(s, bg);
cerr << "Performing test vs BG Noise : " << s[0] << " : " << s[1]
<< " vs " << bg[0] << " : " << bg[1] << " ; Pval = " << pc
<< endl;
// outresultStream << "#gene\tAAPosition\treferenceCodon\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlternativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation" << endl;
// outresultStream <<
// "#gene\tAAPosition\treferenceCodon\treferenceAA\tvariantCodon\tvariantAA\ttotalDepth\tAlternativeAlleleratio\tFETp-value\tFETbgp-value\tAnnotation"
// << endl;
if ( pc < 0.01 )
{
if (pc < 0.01) {
// get the protein name
string prot;
map<string , map<int , int> >::iterator myIterG;
for ( myIterG = this->refProtCorresp.begin() ; myIterG != this->refProtCorresp.end() ; myIterG ++ )
{
if ( this->refProtCorresp[myIterG->first].count( myIterE->first ) > 0 )
{
map<string, map<int, int>>::iterator myIterG;
for (myIterG = this->refProtCorresp.begin();
myIterG != this->refProtCorresp.end(); myIterG++) {
if (this->refProtCorresp[myIterG->first].count(myIterE->first) >
0) {
prot = myIterG->first;
break;
}
}
// write results
cerr << "Variation " << myIterD->first << ":" << myIterE->first << " with " << s[0] << " : " << s[1] << " vs " << c[0] << " : " << c[1] << " ; ctrlPval = " << p << " ; bgPval = " << pc << endl;
outresultStream << myIterD->first << "\t" << myIterE->first << "\t" << prot << "\t" << this->refProtCorresp[prot][myIterE->first] << "\t" << refCodon << "\t" << aa << "\t" << currentCodon << "\t" << currentAa << "\t" << s[1] << "\t" << ratio << "\t" << p << "\t" << pc << "\t" << annotation << endl;
nbVariant ++;
cerr << "Variation " << myIterD->first << ":" << myIterE->first
<< " with " << s[0] << " : " << s[1] << " vs " << c[0] << " : "
<< c[1] << " ; ctrlPval = " << p << " ; bgPval = " << pc
<< endl;
outresultStream
<< myIterD->first << "\t" << myIterE->first << "\t" << prot
<< "\t" << this->refProtCorresp[prot][myIterE->first] << "\t"
<< refCodon << "\t" << aa << "\t" << currentCodon << "\t"
<< currentAa << "\t" << s[1] << "\t" << ratio << "\t" << p
<< "\t" << pc << "\t" << annotation << endl;
nbVariant++;
}
}
nbCall ++;
nbCall++;
}
}
}
outresultStream.close();
return 0;
}
// parse the bam file to get informations on data of the experiment
int Vihgo::readSample()
{
int Vihgo::readSample() {
long int nbLineRead = 0;
int readPassedAln = 0;
int readPassedSecondary = 0;
......@@ -1416,42 +1336,41 @@ int Vihgo::readSample()
cerr << "Reading BAM file..." << endl;
bam1_t *aln = bam_init1();
samFile *fp_in = hts_open( (this->getBamFile()).c_str() , "r" );
bam_hdr_t *bamHdr = sam_hdr_read( fp_in);
samFile *fp_in = hts_open((this->getBamFile()).c_str(), "r");
bam_hdr_t *bamHdr = sam_hdr_read(fp_in);
while(sam_read1(fp_in , bamHdr , aln) > 0)
{
nbLineRead ++;
if ((nbLineRead != 0 ) && ((nbLineRead % 200000) == 0) )
{
cerr << "\t" << nbLineRead << " aln read ; \n\t\tSkipped : " << readPassedAln << " unaligned ; " << readPassedSecondary << " secondary alignements ; " << readPassedSupplementary << " supplementary alignements ; " << readPassedMQ << " bad aln quality ; " << readPassedCIGAROp << " special CIGAR operation " << endl;
while (sam_read1(fp_in, bamHdr, aln) > 0) {
nbLineRead++;
if ((nbLineRead != 0) && ((nbLineRead % 200000) == 0)) {
cerr << "\t" << nbLineRead
<< " aln read ; \n\t\tSkipped : " << readPassedAln << " unaligned ; "
<< readPassedSecondary << " secondary alignements ; "
<< readPassedSupplementary << " supplementary alignements ; "
<< readPassedMQ << " bad aln quality ; " << readPassedCIGAROp
<< " special CIGAR operation " << endl;
}
// skip alignement if is unmmapped
if ( (aln->core.flag & BAM_FUNMAP) == BAM_FUNMAP )
{
readPassedAln ++;
if ((aln->core.flag & BAM_FUNMAP) == BAM_FUNMAP) {
readPassedAln++;
continue;
}
// skip alignement if secondary alignement
if ( (aln->core.flag & BAM_FSECONDARY) == BAM_FSECONDARY)
{
readPassedSecondary ++;
if ((aln->core.flag & BAM_FSECONDARY) == BAM_FSECONDARY) {
readPassedSecondary++;
continue;
}
// skip alignement if supplmentary alignement
if ( (aln->core.flag & BAM_FSUPPLEMENTARY) == BAM_FSUPPLEMENTARY)
{
readPassedSupplementary ++;
if ((aln->core.flag & BAM_FSUPPLEMENTARY) == BAM_FSUPPLEMENTARY) {
readPassedSupplementary++;
continue;
}
// Skip alignment if quality score is less than 30
if (aln->core.qual < 30)
{
readPassedMQ ++;
if (aln->core.qual < 30) {
readPassedMQ++;
continue;
}
......@@ -1461,9 +1380,10 @@ int Vihgo::readSample()
string chr = bamHdr->target_name[aln->core.tid];
// Deal with unknown chromosome
if (!( this->isInGeneList( chr ) ))
{
cerr << "Read is mapped on a reference not known. Please verify the reference provided." << endl;
if (!(this->isInGeneList(chr))) {
cerr << "Read is mapped on a reference not known. Please verify the "
"reference provided."
<< endl;
continue;
}
......@@ -1475,21 +1395,19 @@ int Vihgo::readSample()
// cerr << "Read is in a known gene : " << *myIterA << endl;
// }
// cigar string event detection ; need to be improved to deal with insertion / deletion
// pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD, BACK
// cigar string event detection ; need to be improved to deal with insertion
// / deletion pass the read if cigar op is in : Del, Ins, CLIP, SKIP, PADD,
// BACK
bool readNeedToBeSkipped = false;
for (int k = 0; k < aln->core.n_cigar; k++)
{
const int op = bam_cigar_op( bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen( bam_get_cigar(aln)[k]);
if ( op == BAM_CREF_SKIP || op == BAM_CPAD || op == BAM_CBACK )
{
for (int k = 0; k < aln->core.n_cigar; k++) {
const int op = bam_cigar_op(bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen(bam_get_cigar(aln)[k]);
if (op == BAM_CREF_SKIP || op == BAM_CPAD || op == BAM_CBACK) {
readNeedToBeSkipped = true;
}
}
if (readNeedToBeSkipped )
{
readPassedCIGAROp ++;
if (readNeedToBeSkipped) {
readPassedCIGAROp++;
continue;
}
......@@ -1498,89 +1416,82 @@ int Vihgo::readSample()
// string quality = bam1_qual( aln );
// deal with stop option
if ( this->getCountStop() )
{
if (this->detectStopInRead( aln , pos , chr ))
{
if (this->getCountStop()) {
if (this->detectStopInRead(aln, pos, chr)) {
continue;
}
}
int cumulOffset = 0;
for (int k = 0; k < aln->core.n_cigar; k++)
{
const int op = bam_cigar_op( bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen( bam_get_cigar(aln)[k]);
for (int k = 0; k < aln->core.n_cigar; k++) {
const int op = bam_cigar_op(bam_get_cigar(aln)[k]);
const int ol = bam_cigar_oplen(bam_get_cigar(aln)[k]);
// deal with the cigar special ops
// HARD CLIPPING : No offset increment , no analysis of bases
if ( op == BAM_CHARD_CLIP )
{
if (op == BAM_CHARD_CLIP) {
continue;
}
// SOFT CLIPPING : offset increment ; no analysis of bases
if ( op == BAM_CSOFT_CLIP )
{
if (op == BAM_CSOFT_CLIP) {
cumulOffset += ol;
continue;
}
// insertion
if( op == BAM_CINS )
{
if (op == BAM_CINS) {
// progress on query to get the inserted sequence.
string insertedSequence = "+";
// need to determine on which codon we are
int currentGenomicPosition = pos;
int codonStartPos;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
codonStartPos = currentGenomicPosition;
}
else
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) > 0 )
{
} else {
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) >
0) {
codonStartPos = currentGenomicPosition - 1;
}
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) >
0) {
codonStartPos = currentGenomicPosition - 2;
}
}
// get the bases of the inserted sequence
for ( int x = 0 ; x < ol ; x++)
{
int a = bam_seqi( sequence , cumulOffset + x );
for (int x = 0; x < ol; x++) {
int a = bam_seqi(sequence, cumulOffset + x);
char base;
base = this->getBase( a );
base = this->getBase(a);
insertedSequence += base;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( codonStartPos ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(codonStartPos) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][codonStartPos] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][codonStartPos].count( insertedSequence ) > 0 ) )
{
if (!(this->sampleData[chr][codonStartPos].count(insertedSequence) >
0)) {
this->sampleData[chr][codonStartPos][insertedSequence] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this->sampleData[chr][codonStartPos][insertedSequence] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this->sampleData[chr][codonStartPos][insertedSequence]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : progress length of insertion
cumulOffset += ol;
......@@ -1589,57 +1500,56 @@ int Vihgo::readSample()
}
// insertion
if( op == BAM_CDEL )
{
if (op == BAM_CDEL) {
// progress on query to get the inserted sequence.
string deletedSequence = "-";
// need to determine on which codon we are
int currentGenomicPosition = pos;
int codonStartPos;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
codonStartPos = currentGenomicPosition;
}
else
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) > 0 )
{
} else {
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 1) >
0) {
codonStartPos = currentGenomicPosition - 1;
}
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) > 0 )
{
if ((this->referenceCodon[chr]).count(currentGenomicPosition - 2) >
0) {
codonStartPos = currentGenomicPosition - 2;
}
}
// get the bases of the reference sequence
for ( int x = 0 ; x < ol ; x++)
{
for (int x = 0; x < ol; x++) {
char base;
base = this->getReferenceBase( chr , x + currentGenomicPosition );
base = this->getReferenceBase(chr, x + currentGenomicPosition);
deletedSequence += base;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( codonStartPos ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(codonStartPos) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][codonStartPos] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][codonStartPos].count( deletedSequence ) > 0 ) )
{
if (!(this->sampleData[chr][codonStartPos].count(deletedSequence) >
0)) {
this->sampleData[chr][codonStartPos][deletedSequence] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
}
this->sampleData[chr][codonStartPos][deletedSequence] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
this->sampleData[chr][codonStartPos][deletedSequence]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
// on query : no progress
// on reference : progress the length of the op
......@@ -1647,52 +1557,49 @@ int Vihgo::readSample()
continue;
}
for( int i = 0 ; i < aln->core.l_qseq ; i ++ )
{
for (int i = 0; i < aln->core.l_qseq; i++) {
// check if we are at a codon position
int currentGenomicPosition = pos + i;
string codon;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
for ( int x = 0 ; x < 3 ; x++)
{
int a = bam_seqi( sequence , i+x );
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
for (int x = 0; x < 3; x++) {
int a = bam_seqi(sequence, i + x);
char base;
base = this->getBase( a );
base = this->getBase(a);
codon += base;
}
i += 2;
if (codon.find( "!" ) != string::npos )
{
if (codon.find("!") != string::npos) {
continue;
}
// insert the codon in the data
if( !( this->sampleData.count( chr ) > 0) )
{
map<int , map< string , int> > tmpMap;
if (!(this->sampleData.count(chr) > 0)) {
map<int, map<string, int>> tmpMap;
this->sampleData[chr] = tmpMap;
// cerr << "\tCreating chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating chromosome " << chr << " in control data " <<
// endl;
}
if( !(this->sampleData[chr].count( currentGenomicPosition ) > 0 ) )
{
map< string , int> tmpMap;
if (!(this->sampleData[chr].count(currentGenomicPosition) > 0)) {
map<string, int> tmpMap;
this->sampleData[chr][currentGenomicPosition] = tmpMap;
// cerr << "\tCreating Position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
// cerr << "\tCreating Position " << currentGenomicPosition << " on
// chromosome " << chr << " in control data " << endl;
}
if( !(this->sampleData[chr][currentGenomicPosition].count( codon ) > 0 ) )
{
if (!(this->sampleData[chr][currentGenomicPosition].count(codon) >
0)) {
this->sampleData[chr][currentGenomicPosition][codon] = 0;
// cerr << "\tCreating codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
this->sampleData[chr][currentGenomicPosition][codon] ++;
// cerr << "\tAdding codon " << codon << " on position " << currentGenomicPosition << " on chromosome " << chr << " in control data " << endl;
}
else
{
// cerr << "\tCreating codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in
// control data " << endl;
}
this->sampleData[chr][currentGenomicPosition][codon]++;
// cerr << "\tAdding codon " << codon << " on position " <<
// currentGenomicPosition << " on chromosome " << chr << " in control
// data " << endl;
} else {
continue;
}
}
......@@ -1701,49 +1608,44 @@ int Vihgo::readSample()
}
// destruct objects
bam_hdr_destroy( bamHdr );
sam_close( fp_in );
bam_destroy1( aln );
bam_hdr_destroy(bamHdr);
sam_close(fp_in);
bam_destroy1(aln);
return 0;
}
// Methode to detect a stop in a read
bool Vihgo::detectStopInRead( bam1_t * aln , int32_t pos , string chr )
{
bool Vihgo::detectStopInRead(bam1_t *aln, int32_t pos, string chr) {
const uint8_t *sequence = bam_get_seq(aln);
for( int i = 0 ; i < aln->core.l_qseq ; i ++ )
{
for (int i = 0; i < aln->core.l_qseq; i++) {
// check if we are at a codon position
int currentGenomicPosition = pos + i;
string codon;
if( (this->referenceCodon[chr]).count(currentGenomicPosition) > 0 )
{
for ( int x = 0 ; x < 3 ; x++)
{
int a = bam_seqi( sequence , i+x );
if ((this->referenceCodon[chr]).count(currentGenomicPosition) > 0) {
for (int x = 0; x < 3; x++) {
int a = bam_seqi(sequence, i + x);
char base;
switch( a )
{
case 1 :
switch (a) {
case 1:
base = 'A';
break;
case 2 :
case 2:
base = 'C';
break;
case 4 :
case 4:
base = 'G';
break;
case 8 :
case 8:
base = 'T';
break;
case 15 :
case 15:
base = 'N';
break;
}
......@@ -1751,15 +1653,11 @@ bool Vihgo::detectStopInRead( bam1_t * aln , int32_t pos , string chr )
}
i += 2;
if (this->codonTable[codon] == "*" )
{
if (this->codonTable[codon] == "*") {
return true;
}
}
else
{
} else {
continue;
}
}
......@@ -1768,60 +1666,55 @@ bool Vihgo::detectStopInRead( bam1_t * aln , int32_t pos , string chr )
// Method to compute and write the consensus sequence
// need to modify for deletion and insertion
void Vihgo::writeConsensus()
{
void Vihgo::writeConsensus() {
cerr << "Writing consensus sequence ... " << endl;
ofstream csStream;
csStream.open( (this->getConsensusFile()).c_str() , ios::out );
csStream.open((this->getConsensusFile()).c_str(), ios::out);
map<string , map<int , map< string , int> > >::iterator myIterA;
map<int , map< string , int> >::iterator myIterB;
map< string , int>::iterator myIterC;
map<string, map<int, map<string, int>>>::iterator myIterA;
map<int, map<string, int>>::iterator myIterB;
map<string, int>::iterator myIterC;
map< string , map< int , string> >::iterator myIterE;
map< int , string>::iterator myIterF;
map<string, map<int, string>>::iterator myIterE;
map<int, string>::iterator myIterF;
for (myIterE = this->protCodon.begin() ; myIterE != this->protCodon.end() ; myIterE ++ )
{
for (myIterE = this->protCodon.begin(); myIterE != this->protCodon.end();
myIterE++) {
string prot = myIterE->first;
string chrom = getChrFromProt( prot );
string chrom = getChrFromProt(prot);
if ( ! ( this->sampleData.count( chrom ) > 0 ) )
{
if (!(this->sampleData.count(chrom) > 0)) {
continue;
}
csStream << ">" << prot << endl;
for ( myIterF = (myIterE->second).begin() ; myIterF != (myIterE->second).end() ; myIterF ++ )
{
for (myIterF = (myIterE->second).begin();
myIterF != (myIterE->second).end(); myIterF++) {
int aaPosition = myIterF->first;
int gPosition;
map<int, int> tmpMap = this->refProtCorresp[prot];
map<int, int>::iterator myIterG;
for ( myIterG = tmpMap.begin() ; myIterG != tmpMap.end() ; myIterG ++ )
{
if ( myIterG->second == aaPosition )
{
for (myIterG = tmpMap.begin(); myIterG != tmpMap.end(); myIterG++) {
if (myIterG->second == aaPosition) {
gPosition = myIterG->first;
break;
}
}
// cerr << "ppos : " << aaPosition << " ; genomic position is : " << gPosition << endl;
// cerr << "ppos : " << aaPosition << " ; genomic position is : " <<
// gPosition << endl;
// determine most frequent codon
string codon = "NNN";
int count = 0;
if ( this->sampleData[chrom].count( gPosition ) > 0 )
{
map< string , int > tmpMapCodon;
if (this->sampleData[chrom].count(gPosition) > 0) {
map<string, int> tmpMapCodon;
tmpMapCodon = this->sampleData[chrom][gPosition];
map< string , int >::iterator myIterH;
for ( myIterH = tmpMapCodon.begin() ; myIterH != tmpMapCodon.end() ; myIterH ++ )
{
if (myIterH->second > count )
{
map<string, int>::iterator myIterH;
for (myIterH = tmpMapCodon.begin(); myIterH != tmpMapCodon.end();
myIterH++) {
if (myIterH->second > count) {
codon = myIterH->first;
count = myIterH->second;
}
......@@ -1832,89 +1725,71 @@ void Vihgo::writeConsensus()
csStream << endl;
}
csStream.close();
}
// method to get the chromosome in which the prot is included
string Vihgo::getChrFromProt( string incProt )
{
string Vihgo::getChrFromProt(string incProt) {
return this->genes[incProt]["chrom"];
}
// Method to verify the existence of a gene in the reference provided
bool Vihgo::isInGeneList( string incName )
{
map<string, map<int, char> >::iterator myIterA ;
for( myIterA = this->referenceSequence.begin() ; myIterA != this->referenceSequence.end() ; myIterA ++ )
{
if ( myIterA->first == incName )
{
bool Vihgo::isInGeneList(string incName) {
map<string, map<int, char>>::iterator myIterA;
for (myIterA = this->referenceSequence.begin();
myIterA != this->referenceSequence.end(); myIterA++) {
if (myIterA->first == incName) {
return true;
}
}
return false;
}
// Method to determine if a position is in a gene
// return a gene name if possible, if several genes are present : return the genes separated with comma
// or return "none" if no gene is at the position
string Vihgo::isInGene( string incChrom , int incPos )
{
// cerr << "Entering isInGene method with incoming arguments : " << incChrom << " and " << incPos << endl;
// return a gene name if possible, if several genes are present : return the
// genes separated with comma or return "none" if no gene is at the position
string Vihgo::isInGene(string incChrom, int incPos) {
// cerr << "Entering isInGene method with incoming arguments : " << incChrom
// << " and " << incPos << endl;
string ret = "";
bool posFound = false;
map< string , map< string , string > >::iterator myIterA;
for (myIterA = genes.begin() ; myIterA != genes.end() ; myIterA ++)
{
map<string, map<string, string>>::iterator myIterA;
for (myIterA = genes.begin(); myIterA != genes.end(); myIterA++) {
string allChr = genes[myIterA->first]["chrom"];
vector<string> allChrVec = parseOnSep( allChr , "," );
vector<string> allChrVec = parseOnSep(allChr, ",");
vector<string>::iterator myIterB;
for( myIterB = allChrVec.begin() ; myIterB != allChrVec.end() ; myIterB ++ )
{
if (*myIterB == incChrom )
{
for (myIterB = allChrVec.begin(); myIterB != allChrVec.end(); myIterB++) {
if (*myIterB == incChrom) {
vector<string> allStart;
allStart = parseOnSep( genes[myIterA->first]["start"] , "," );
allStart = parseOnSep(genes[myIterA->first]["start"], ",");
vector<string> allEnd;
allEnd = parseOnSep( genes[myIterA->first]["end"] , "," );
allEnd = parseOnSep(genes[myIterA->first]["end"], ",");
vector<string>::iterator myIterC;
int i = 0;
for( myIterC = allStart.begin() ; myIterC != allStart.end() ; myIterC ++)
{
int s = string_to_int( *myIterC );
if( incPos >= s )
{
if( incPos <= string_to_int( allEnd[i] ) )
{
if( ret.length() > 0 )
{
ret.append( "," );
for (myIterC = allStart.begin(); myIterC != allStart.end(); myIterC++) {
int s = string_to_int(*myIterC);
if (incPos >= s) {
if (incPos <= string_to_int(allEnd[i])) {
if (ret.length() > 0) {
ret.append(",");
}
ret += myIterA->first;
}
}
i ++;
i++;
}
}
else
{
} else {
continue;
}
}
}
if( ret.length() > 0)
{
if (ret.length() > 0) {
// cerr << "\tReturning " << ret << endl;
return ret ;
}
else
{
return ret;
} else {
// cerr << "\tReturning none" << endl;
return "none";
}
......@@ -1923,89 +1798,62 @@ string Vihgo::isInGene( string incChrom , int incPos )
// getters
// method to get the refernce base, given a chr and a position
char Vihgo::getReferenceBase( string incChr , int incPos )
{
if ( (this->referenceSequence[incChr]).count(incPos) > 0 )
{
char Vihgo::getReferenceBase(string incChr, int incPos) {
if ((this->referenceSequence[incChr]).count(incPos) > 0) {
return this->referenceSequence[incChr][incPos];
}
else
{
} else {
return 'N';
}
}
// method to get AA from a codon
string Vihgo::getAAfromCodon( string incCodon )
{
if ( this->codonTable.count( incCodon ) > 0)
{
string Vihgo::getAAfromCodon(string incCodon) {
if (this->codonTable.count(incCodon) > 0) {
return this->codonTable[incCodon];
}
else
{
} else {
return "?";
}
}
vector<string> Vihgo::getAnnotationFiles()
{
vector<string> Vihgo::getAnnotationFiles() {
vector<string> res;
res.push_back( this->integraseFile );
res.push_back( this->proteaseFile );
res.push_back( this->retrotranscriptaseFile );
res.push_back(this->integraseFile);
res.push_back(this->proteaseFile);
res.push_back(this->retrotranscriptaseFile);
return res;
}
string Vihgo::getControlBamFile()
{
return this->inRunControlBam;
}
string Vihgo::getBamFile()
{
return this->inputBamFile;
}
string Vihgo::getControlBamFile() { return this->inRunControlBam; }
bool Vihgo::getCountStop()
{
return this->countStop;
}
string Vihgo::getBamFile() { return this->inputBamFile; }
string Vihgo::getConsensusFile()
{
return this->csFile;
}
bool Vihgo::getCountStop() { return this->countStop; }
bool Vihgo::isThereAControl()
{
if( (this->inRunControlBam).size() > 0 )
{
string Vihgo::getConsensusFile() { return this->csFile; }
bool Vihgo::isThereAControl() {
if ((this->inRunControlBam).size() > 0) {
return true;
}
return false;
}
char Vihgo::getBase( int b )
{
char Vihgo::getBase(int b) {
// cerr << "inc int base: " << b << " : " << endl;;
switch( b )
{
case 1 :
switch (b) {
case 1:
return 'A';
case 2 :
case 2:
return 'C';
case 4 :
case 4:
return 'G';
case 8 :
case 8:
return 'T';
case 15 :
case 15:
return 'N';
}
return '!';
......
......@@ -66,8 +66,8 @@ int main(int argc, char* argv[])
("gene,g" , po::value<string>( &geneFile ) , "Gene annotation file related to reference genome provided." )
("depth,d", po::value<int>( &depthThreshold )->default_value(50), "Depth treshold to call a variant. (optional)")
("ratio,a" , po::value<double>( &ratioThreshold )->default_value(0.1) , "Minimum ratio of alt reads to take in account a mutation. (optional)" )
("pvalue,p" , po::value<double>( &pvalueThreshold )->default_value(0.05) , "pvalue threshold from FET to take in account a mutation. (optional)" )
("ratio,a" , po::value<double>( &ratioThreshold )->default_value(0.1) , "Minimum ratio of alt reads to consider a mutation. (optional)" )
("pvalue,p" , po::value<double>( &pvalueThreshold )->default_value(0.05) , "pvalue threshold from FET to consider a mutation. (optional)" )
("stop,s" , "Activate stop codon detction." )
("thread,t", po::value<int>( &nbThread )->default_value(50), "Number of thread to use. (NYI)")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment