Commit 3646e45f authored by Yannis Duffourd's avatar Yannis Duffourd

Adding new method to compute the full genome consensus sequence

parent 39c51b1b
......@@ -986,6 +986,7 @@ int Vihgo::readRunControl() {
return 0;
}
// perform variant calling
int Vihgo::variantCalling() {
// estimate BG noise on control data
// we need to eliminate "real" variants by testing their vaf to all the sample
......@@ -1675,7 +1676,7 @@ bool Vihgo::detectStopInRead(bam1_t *aln, int32_t pos, string chr) {
// Method to compute and write the consensus sequence
// need to modify for deletion and insertion
void Vihgo::writeConsensus() {
cerr << "Writing consensus sequence ... " << endl;
cerr << "Writing consensus sequence gene by gene ... " << endl;
ofstream csStream;
csStream.open((this->getConsensusFile()).c_str(), ios::out);
......@@ -1741,16 +1742,24 @@ void Vihgo::writeFullConsensus() {
ofstream csStream;
csStream.open((this->getConsensusFile()).c_str(), ios::out);
csStream << ">" << "full_cs_sequence" << endl;
map<string, map<int, string>>::iterator myIterA;
map<int, string>::iterator myIterB;
for (myIterA = this->referenceCodon.begin(); myIterA != this->referenceCodon.end();
map<string, map<int, map<string, int>>>::iterator myIterA;
map<int, map<string, int>>::iterator myIterB;
int currentGenomicPosition = 1;
for (myIterA = this->sampleData.begin(); myIterA != this->sampleData.end();
myIterA++) {
string chrom = myIterA->first;
cerr << "Chrom : " << chrom << endl;
for (myIterB = (myIterA->second).begin();
myIterB != (myIterA->second).end(); myIterB++) {
int gPosition = myIterB->first;
// first determine if we are on the correct genomic position
// if further, complete with N until currentGenomicPosition has reached the point we are in the sample data
while (gPosition > currentGenomicPosition){
// cerr << "\tposition : " << currentGenomicPosition << " : N" << endl;
csStream << "N";
currentGenomicPosition++;
}
// cerr << "\tposition : " << gPosition;
// determine most frequent codon at this position
string codon = "NNN";
int count = 0;
......@@ -1760,22 +1769,44 @@ void Vihgo::writeFullConsensus() {
map<string, int>::iterator myIterH;
for (myIterH = tmpMapCodon.begin(); myIterH != tmpMapCodon.end();
myIterH++) {
if (myIterH->second > count) {
if ((myIterH->second > count) and (myIterH->second >= this->getDepthThreshold())){
codon = myIterH->first;
count = myIterH->second;
}
}
}
// NEED TO DEAL with insertion / deletion
// TODO
// insertion :
// detect that i's an ins (+ at first char)
if (codon[0] == '+'){
// get the ref base and add the new sequence
char refBase = this->referenceSequence[chrom][currentGenomicPosition];
string tmp = char_to_string(refBase) + pyReplace(codon , "+" , "");
codon = tmp;
// cerr << " : INS : " << codon << endl;
csStream << codon;
currentGenomicPosition += 1;
continue;
}
// deletion
if (codon[0] == '-'){
string tmp = pyReplace(codon , "-" , "");
// cerr << " : DEL : " << codon << endl;
currentGenomicPosition += tmp.length();
continue;
}
// write codon : if found will be a true codon, other wise will be a NNN
// cerr << " : " << codon << endl;
csStream << codon;
// update the currentGenomicPosition for the next turn
currentGenomicPosition += 3;
}
csStream << endl;
}
csStream.close();
}
// method to get the chromosome in which the prot is included
string Vihgo::getChrFromProt(string incProt) {
return this->genes[incProt]["chrom"];
......@@ -1845,6 +1876,10 @@ string Vihgo::isInGene(string incChrom, int incPos) {
}
// getters
// Depth Threshold parameter
int Vihgo::getDepthThreshold(){
return this->depthThreshold;
}
// method to get the refernce base, given a chr and a position
char Vihgo::getReferenceBase(string incChr, int incPos) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment