Commit b2c92990 authored by Yannis Duffourd's avatar Yannis Duffourd

Adding cpp version

parent abbbd9a5
all:
g++ -c Vihgo.cpp -O3
g++ -c utils.cpp -O3
g++ -c main.cpp
g++ main.o Vihgo.o utils.o -o vihgo -lboost_program_options
// C++ std libs
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <sys/time.h>
#include <sys/types.h>
// C++ Boost
#include <boost/program_options.hpp>
// utils
#include "utils.h"
// Classes
#include "Vihgo.h"
using namespace std;
Vihgo::Vihgo()
{
//
}
Vihgo::Vihgo( string iBF , string gF, string intF, string protF, string refF, string outF, string cResF, string csF, string iRCB, int dT, int nT, int lL, double rT ,double pvT, bool countStop )
{
this->inputBamFile = iBF;
this->geneFile = gF;
this->integraseFile = intF;
this->proteaseFile = protF;
this->referenceFile = refF;
this->outputFile = outF;
this->codonResultFile = cResF;
this->csFile = csF;
this->inRunControlBam = iRCB;
this->depthThreshold = dT;
this->nbThread = nT;
this->loggingLevel = lL;
this->ratioThreshold = rT;
this->pvalueThreshold = pvT;
this->countStop = countStop;
}
int Vihgo::mainLoop()
{
this->getGenome();
//this->displayGenome();
this->getGenes();
this->displayGenes();
return 0;
}
int Vihgo::getGenome( )
{
struct timeval tbegin, tend;
string ligne;
char mot;
string header = ">";
int i = 0;
string currentChr;
// fasta parsing
ifstream fastaStream ( this->referenceFile.c_str() );
if ( fastaStream )
{
while ( getline ( fastaStream, ligne) )
{
if( ligne.find( header ) == 0 )
{
ligne.replace(0,1,"");
currentChr = ligne;
map< int , char> tmpMap;
this->referenceSequence[currentChr] = tmpMap;
}
else
{
for( unsigned int k = 0 ; k < ligne.length() ; k ++ )
{
mot = ligne[k];
mot = checkBase( mot );
this->referenceSequence[currentChr][i] = mot;
i ++;
}
}
}
}
return 0;
}
void Vihgo::displayGenome()
{
map<string, map<int, char>>::iterator myIterA ;
for( myIterA = this->referenceSequence.begin() ; myIterA != this->referenceSequence.end() ; myIterA ++ )
{
cerr << "Chr " << myIterA->first << " : " << endl;
map<int, char> tmpMap;
tmpMap = myIterA->second;
map<int, char>::iterator myIterB;
for( myIterB = tmpMap.begin() ; myIterB != tmpMap.end() ; myIterB ++ )
{
cerr << myIterB->first << " : " << myIterB->second << " ; ";
}
cerr << endl;
}
}
int Vihgo::getGenes()
{
struct timeval tbegin, tend;
string ligne;
string mot;
string header = ">";
int i = 0;
string currentChr;
string chromosome;
string cds;
string annot;
string start;
string end;
string strand;
// fasta parsing
ifstream geneStream ( this->geneFile.c_str() );
if ( geneStream )
{
while ( getline ( geneStream, ligne) )
{
i = 0;
// get information from the line
istringstream issLigne (ligne);
while ( getline( issLigne, mot, '\t' ) )
{
switch( i )
{
case 0 :
chromosome = mot;
break;
case 2 :
cds = mot;
break;
case 3 :
start= mot;
break;
case 4 :
end = mot;
break;
case 6 :
strand = mot;
break;
case 8 :
annot = mot;
break;
default :
break;
}
i ++;
}
if( cds == "CDS" )
{
istringstream issAnnot (annot);
while ( getline( issAnnot, mot, ';' ) )
{
if( mot.find( "protein_id" ) == 0 )
{
istringstream issMot( mot );
string subMot;
string g;
i = 0;
while ( getline( issMot, subMot, '=' ) )
{
if( i == 1 )
{
g = subMot;
}
i ++;
}
// modify gene string : deleting special chars
// cerr << "Gene before : " << subMot << endl;
subMot = pyReplace(subMot , "\"" , "");
// cerr << "Gene after : " << subMot << endl;
// test if gene is already known
if (this->genes.count( subMot ) == 0 )
{
cerr << "Creating new gene : " << subMot << endl;
map<string , string> tmpMap;
map<int, string> tmpMapT;
this->genes[subMot] = tmpMap;
this->referenceCodon[subMot] = tmpMapT;
this->genes[subMot]["start"] = "";
this->genes[subMot]["end"] = "";
this->genes[subMot]["strand"] = "";
this->genes[subMot]["chrom"] = chromosome;
}
// add informations
if( (this->genes[subMot]["start"]).length() > 0 )
{
(this->genes[subMot]["start"]).append( "," );
}
(this->genes[subMot]["start"]).append( start );
if( (this->genes[subMot]["end"]).length() > 0 )
{
(this->genes[subMot]["end"]).append( ",");
}
(this->genes[subMot]["end"]).append( end );
if( (this->genes[subMot]["strand"]).length() > 0 )
{
(this->genes[subMot]["strand"]).append(",");
}
(this->genes[subMot]["strand"]).append( strand );
}
}
}
}
}
return 0;
}
void Vihgo::displayGenes()
{
cerr << "Displaying gene list : " << endl;
map<string, map< string , string>>::iterator myIterB;
for( myIterB = this->genes.begin() ;myIterB != this->genes.end() ; myIterB ++ )
{
cerr << "Gene : " << myIterB->first << " : " << endl;
map< string , string> tmpMap;
tmpMap = myIterB->second;
cerr << "chr : " << tmpMap["chrom"] << " ; start : " << tmpMap["start"] << " ; end : " << tmpMap["end"] << " ; strand : " << tmpMap["strand"] << endl;
}
}
// getters
// setters
#ifndef _VIHGO_H
#define _VIHGO_H
#include <vector>
#include <map>
class Vihgo
{
public:
Vihgo();
Vihgo( std::string , std::string , std::string , std::string , std::string, std::string,std::string,std::string,std::string, int, int , int , double, double, bool);
int mainLoop();
int getGenome();
int getGenes();
private :
std::string sampleName;
std::string inputBamFile;
std::string integraseFile;
std::string proteaseFile;
std::string referenceFile;
std::string outputFile;
std::string codonResultFile;
std::string csFile;
std::string inRunControlBam;
std::string geneFile;
int depthThreshold;
int nbThread;
int loggingLevel;
double ratioThreshold;
double pvalueThreshold;
bool countStop;
std::map<std::string, std::map<int, char>> referenceSequence;
std::map<std::string, std::map< std::string , std::string>> genes;
std::map<std::string, std::map< int , std::string>> referenceCodon;
// method
void displayGenome();
void displayGenes();
};
#endif
// C++ std libs
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <sys/time.h>
#include <sys/types.h>
// C++ Boost
#include <boost/program_options.hpp>
// utils
#include "utils.h"
// Classes
#include "Vihgo.h"
using namespace std;
namespace po = boost::program_options;
int main(int argc, char* argv[])
{
cerr << "Starting Main" << endl;
bool countStop = false;
int currentThread = 0;
int depthThreshold;
int nbThread = 1;
int loggingLevel;
double ratioThreshold;
double pvalueThreshold;
string integraseFile;
string bamFile;
string rtFile;
string proteaseFile;
string referenceFile;
string outputFile;
string codonResultFile;
string csFile;
string inRunControlBam;
string logFile;
string geneFile;
po::options_description desc("Allowed options");
desc.add_options()
("help,h", "produce help message")
("input,i", po::value<string>( &bamFile ), "Input BAM file containing aligned read from sample." )
("output,o", po::value<string>( &outputFile ), "output result file variants." )
("logFile,l", po::value<string>( &logFile ), "Log file output." )
("codon,c", po::value<string>( &codonResultFile ), "output result file variants." )
("consensus,C", po::value<string>( &csFile ), "Consensus sequence file (fasta)." )
("integrase,I", po::value<string>( &integraseFile ), "File annotation for integrase prot." )
("rt,R" , po::value<string>( &rtFile ) , "File annotation for retrotranscriptase prot." )
("protease,P" , po::value<string>( &proteaseFile ) , "File annotation for protease prot." )
("reference,r" , po::value<string>( &referenceFile ) , "Reference Fasta file." )
("control,b" , po::value<string>( &inRunControlBam ) , "In run control BAM file." )
("gene,g" , po::value<string>( &geneFile ) , "Gene annotation file related to reference genome provided." )
("depth,d", po::value<int>( &depthThreshold )->default_value(50), "Depth treshold to call a variant. (optional)")
("ratio,a" , po::value<double>( &ratioThreshold )->default_value(0.1) , "Minimum ratio of alt reads to take in account a mutation. (optional)" )
("pvalue,p" , po::value<double>( &pvalueThreshold )->default_value(0.05) , "pvalue threshold from FET to take in account a mutation. (optional)" )
("stop,s" , "Activate stop codon detction." )
("thread,t", po::value<int>( &nbThread )->default_value(50), "Number of thread to use. (NYI)")
("loglevel,L", po::value<int>( &loggingLevel )->default_value(1), "Logging level.");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
// Verif que le nb de thread n'est pas négatif ...
if( nbThread < 1 )
{
nbThread = 1;
cerr << "You entered a null or negative value for thread number, positionning it to 1. " << endl;
}
if( argc <= 1 )
{
cerr << "Error while checking program arguments" << endl;
cerr << desc << "\n";
return 1;
}
// check provided files
if( bamFile.length( ) > 0 )
{
if( !IsFileReadable( bamFile ) )
{
cerr << "File provided as input BAM : " << bamFile << " is not accessible : stopping" << endl;
return -1;
}
}
else
{
cerr << "No file provided as input BAM file : stopping" << endl;
return -1;
}
if( integraseFile.length( ) > 0 )
{
if( !IsFileReadable( integraseFile ) )
{
cerr << "File provided as integrase annotation : " << integraseFile << " is not accessible : stopping" << endl;
return -1;
}
}
if( rtFile.length( ) > 0 )
{
if( !IsFileReadable( rtFile ) )
{
cerr << "File provided as retrotranscriptase annotation : " << rtFile << " is not accessible : stopping" << endl;
return -1;
}
}
if( proteaseFile.length( ) > 0 )
{
if( !IsFileReadable( proteaseFile ) )
{
cerr << "File provided as protease annotation : " << proteaseFile << " is not accessible : stopping" << endl;
return -1;
}
}
if( referenceFile.length( ) > 0 )
{
if( !IsFileReadable( referenceFile ) )
{
cerr << "File provided as reference : " << referenceFile << " is not accessible : stopping" << endl;
return -1;
}
}
else
{
cerr << "No file provided as reference file : stopping" << endl;
return -1;
}
if( inRunControlBam.length( ) > 0 )
{
if( !IsFileReadable( inRunControlBam ) )
{
cerr << "File provided as control sample BAM file : " << inRunControlBam << " is not accessible : stopping" << endl;
return -1;
}
}
else
{
cerr << "No file provided as control sample BAM file : stopping" << endl;
return -1;
}
if( geneFile.length( ) > 0 )
{
if( !IsFileReadable( geneFile ) )
{
cerr << "File provided as gene annotation : " << geneFile << " is not accessible : stopping" << endl;
return -1;
}
}
else
{
cerr << "No file provided as gene annotation file : stopping" << endl;
return -1;
}
Vihgo * App = new Vihgo( bamFile , geneFile , integraseFile , proteaseFile, referenceFile,outputFile, codonResultFile, csFile, inRunControlBam,depthThreshold, nbThread,loggingLevel, ratioThreshold ,pvalueThreshold, countStop );
App->mainLoop();
delete App;
cerr << "end of main" << endl;
return 0;
}
// utility functions for bioinformatics
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <sys/time.h>
#include <sys/types.h>
#include "utils.h"
using namespace std;
// test if a file is readable
// return value : true if readable ; false if not
bool IsFileReadable( string file )
{
ifstream fichier( file.c_str() );
return !fichier.fail();
}
// display a time lenth in µs
// return value : void
void ExecMeasure( struct timeval begin , struct timeval end , string operation )
{
cerr << "Execution time for operation : " << operation << " : " << end.tv_usec - begin.tv_usec << " µs" << endl;
}
int string_to_int( string incomingStr )
{
istringstream isstmp( incomingStr );
int i;
isstmp >> i;
return i;
}
string double_to_string( double incoming )
{
string result;
ostringstream oss;
oss << incoming;
result = oss.str();
return result;
}
string int_to_string( int incoming )
{
string result;
ostringstream oss;
oss << incoming;
result = oss.str();
return result;
}
string pyReplace( string incoming , string pattern , string replacement )
{
while (incoming.rfind( pattern ) != string::npos )
{
int n = incoming.rfind( pattern );
int l = pattern.length();
incoming.replace( n , l , replacement );
}
return incoming;
}
string char_to_string(char incoming)
{
string s;
stringstream ss;
ss << incoming;
ss >> s;
return s;
}
char checkBase(char incoming )
{
if( incoming == 'c' )
{
return 'C';
}
if( incoming == 't' )
{
return 'T';
}
if( incoming == 'a' )
{
return 'A';
}
if( incoming == 'g' )
{
return 'G';
}
if( incoming == 'n' )
{
return 'N';
}
if( incoming == 'C' )
{
return 'C';
}
if( incoming == 'T' )
{
return 'T';
}
if( incoming == 'A' )
{
return 'A';
}
if( incoming == 'G' )
{
return 'G';
}
if( incoming == 'N' )
{
return 'N';
}
return 'N';
}
#ifndef _UTILS_H
#define _UTILS_H
bool IsFileReadable( std::string );
void ExecMeasure( struct timeval, struct timeval, std::string );
int string_to_int( std::string );
std::string double_to_string( double );
std::string int_to_string( int );
char checkBase(char);
std::string pyReplace( std::string , std::string , std::string );
std::string char_to_string(char);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment