Commit 3646e45f authored by Yannis Duffourd's avatar Yannis Duffourd

Adding new method to compute the full genome consensus sequence

parent 39c51b1b
...@@ -986,6 +986,7 @@ int Vihgo::readRunControl() { ...@@ -986,6 +986,7 @@ int Vihgo::readRunControl() {
return 0; return 0;
} }
// perform variant calling
int Vihgo::variantCalling() { int Vihgo::variantCalling() {
// estimate BG noise on control data // estimate BG noise on control data
// we need to eliminate "real" variants by testing their vaf to all the sample // we need to eliminate "real" variants by testing their vaf to all the sample
...@@ -1675,7 +1676,7 @@ bool Vihgo::detectStopInRead(bam1_t *aln, int32_t pos, string chr) { ...@@ -1675,7 +1676,7 @@ bool Vihgo::detectStopInRead(bam1_t *aln, int32_t pos, string chr) {
// Method to compute and write the consensus sequence // Method to compute and write the consensus sequence
// need to modify for deletion and insertion // need to modify for deletion and insertion
void Vihgo::writeConsensus() { void Vihgo::writeConsensus() {
cerr << "Writing consensus sequence ... " << endl; cerr << "Writing consensus sequence gene by gene ... " << endl;
ofstream csStream; ofstream csStream;
csStream.open((this->getConsensusFile()).c_str(), ios::out); csStream.open((this->getConsensusFile()).c_str(), ios::out);
...@@ -1741,16 +1742,24 @@ void Vihgo::writeFullConsensus() { ...@@ -1741,16 +1742,24 @@ void Vihgo::writeFullConsensus() {
ofstream csStream; ofstream csStream;
csStream.open((this->getConsensusFile()).c_str(), ios::out); csStream.open((this->getConsensusFile()).c_str(), ios::out);
csStream << ">" << "full_cs_sequence" << endl; csStream << ">" << "full_cs_sequence" << endl;
map<string, map<int, map<string, int>>>::iterator myIterA;
map<string, map<int, string>>::iterator myIterA; map<int, map<string, int>>::iterator myIterB;
map<int, string>::iterator myIterB; int currentGenomicPosition = 1;
for (myIterA = this->sampleData.begin(); myIterA != this->sampleData.end();
for (myIterA = this->referenceCodon.begin(); myIterA != this->referenceCodon.end();
myIterA++) { myIterA++) {
string chrom = myIterA->first; string chrom = myIterA->first;
cerr << "Chrom : " << chrom << endl;
for (myIterB = (myIterA->second).begin(); for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) { myIterB != (myIterA->second).end(); myIterB++) {
int gPosition = myIterB->first; int gPosition = myIterB->first;
// first determine if we are on the correct genomic position
// if further, complete with N until currentGenomicPosition has reached the point we are in the sample data
while (gPosition > currentGenomicPosition){
// cerr << "\tposition : " << currentGenomicPosition << " : N" << endl;
csStream << "N";
currentGenomicPosition++;
}
// cerr << "\tposition : " << gPosition;
// determine most frequent codon at this position // determine most frequent codon at this position
string codon = "NNN"; string codon = "NNN";
int count = 0; int count = 0;
...@@ -1760,22 +1769,44 @@ void Vihgo::writeFullConsensus() { ...@@ -1760,22 +1769,44 @@ void Vihgo::writeFullConsensus() {
map<string, int>::iterator myIterH; map<string, int>::iterator myIterH;
for (myIterH = tmpMapCodon.begin(); myIterH != tmpMapCodon.end(); for (myIterH = tmpMapCodon.begin(); myIterH != tmpMapCodon.end();
myIterH++) { myIterH++) {
if (myIterH->second > count) { if ((myIterH->second > count) and (myIterH->second >= this->getDepthThreshold())){
codon = myIterH->first; codon = myIterH->first;
count = myIterH->second; count = myIterH->second;
} }
} }
} }
// NEED TO DEAL with insertion / deletion
// TODO
// insertion :
// detect that i's an ins (+ at first char)
if (codon[0] == '+'){
// get the ref base and add the new sequence
char refBase = this->referenceSequence[chrom][currentGenomicPosition];
string tmp = char_to_string(refBase) + pyReplace(codon , "+" , "");
codon = tmp;
// cerr << " : INS : " << codon << endl;
csStream << codon;
currentGenomicPosition += 1;
continue;
}
// deletion
if (codon[0] == '-'){
string tmp = pyReplace(codon , "-" , "");
// cerr << " : DEL : " << codon << endl;
currentGenomicPosition += tmp.length();
continue;
}
// write codon : if found will be a true codon, other wise will be a NNN // write codon : if found will be a true codon, other wise will be a NNN
// cerr << " : " << codon << endl;
csStream << codon; csStream << codon;
// update the currentGenomicPosition for the next turn
currentGenomicPosition += 3;
} }
csStream << endl; csStream << endl;
} }
csStream.close(); csStream.close();
} }
// method to get the chromosome in which the prot is included // method to get the chromosome in which the prot is included
string Vihgo::getChrFromProt(string incProt) { string Vihgo::getChrFromProt(string incProt) {
return this->genes[incProt]["chrom"]; return this->genes[incProt]["chrom"];
...@@ -1845,6 +1876,10 @@ string Vihgo::isInGene(string incChrom, int incPos) { ...@@ -1845,6 +1876,10 @@ string Vihgo::isInGene(string incChrom, int incPos) {
} }
// getters // getters
// Depth Threshold parameter
int Vihgo::getDepthThreshold(){
return this->depthThreshold;
}
// method to get the refernce base, given a chr and a position // method to get the refernce base, given a chr and a position
char Vihgo::getReferenceBase(string incChr, int incPos) { char Vihgo::getReferenceBase(string incChr, int incPos) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment