#!/usr/bin/env python
# -*- coding: utf-8 -*-


#  ### GAD development file  ###
#  ## hiv_genotyping.py
#  ## Version : 1.0
#  ## Description : 
#  ## Usage : hiv_genotyping.py 
#  ## Output : 
#  ## Requirements : python 2.7+ - pySAM - scipy 
#  
#  ## Author: yannis.duffourd@u-bourgogne.fr
#  ## Creation Date: 2016-10-18
#  ## last revision date: 2018-03-29
#  ## Known bugs: None
#  ## TODO: 
#  

import os
import sys
import getopt
import logging
import math
import random
import string 


# pysam requirement
try:
	import pysam 
except ImportError:
	sys.stderr.write( "pysam library is required and was not found on your system. Please check PYTHONPATH, or install the library. Exiting.\n" )
	sys.exit( 1 )

# scipy requirement
try:
	from scipy import stats
except ImportError:
	sys.stderr.write( "scipy library is required and was not found on your system. Please check PYTHONPATH, or install the library. Exiting.\n" )
	sys.exit( 1 )
	
# plotly optional requirement
isGraphicalPossible = False
try:
	import plotly.plotly as py
	from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
	import plotly.graph_objs as go
	from plotly.graph_objs import Scatter, Figure, Layout
	isGraphicalPossible = True
except ImportError:
	sys.stderr.write( "plotly library is required and was not found on your system. Please check PYTHONPATH, or install the library. Continuing without graphical possibilities.\n" )

	
	
	
	

currentThread = 0
integraseFile , samFile , rtFile , proteaseFile , referenceFile , logFile , outputFile , allControlBamList , inRunControlBam , gtfFile , csFile = "" , "" , "" , "" , "" , "" , "" , "" , "" , "" , ""

#  generate a temp hash for the file result
hsh = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(12))


depthThreshold = 200
ratioThreshold = 0
pvalueThreshold = 1.0
annotationIsDefined = False
nbThread = 1
countStop = False
graphical = False
loggingLevel = 1
referenceSequence = {}
referenceCodon = {}
knownMutationTable = {}
genes = {}
effectifTable = {}
totalAltSequencedBase = {}
protCodon = {}
bamCodon = {}
bamCodonReference = {}

# codon table 
codonTable = { "TTT":"F" , "TTC" : "F" , "TTA" : "L" , "TTG" : "L" , "CTT":"L" , "CTC":"L" , "CTA":"L" , "CTG":"L" , "ATT" : "I" , "ATC":"I" , "ATA":"I" , "ATG":"M" , "GTT":"V" , "GTC":"V" , "GTA":"V" , "GTG":"V" , "TCT":"S" , "TCC":"S" , "TCA":"S" , "TCG":"S" , "CCT":"P" , "CCC":"P" , "CCA":"P" , "CCG":"P", "ACT":"T" , "ACC":"T" , "ACA":"T" , "ACG":"T" , "GCT":"A" , "GCC":"A" , "GCA":"A" , "GCG":"A" , "TAT":"Y", "TAC":"Y" , "TAA":"*" , "TAG":"*" , "CAT":"H" , "CAC":"H" , "CAA":"Q" , "CAG" : "Q" , "AAT":"N" , "AAC":"N" , "AAA":"K" , "AAG":"K" , "GAT":"D" , "GAC":"D" , "GAA":"E" , "GAG":"E", "TGT":"C" , "TGC":"C" , "TGA":"*" , "TGG":"W" , "CGT":"R" , "CGC":"R" , "CGA":"R" , "CGG":"R" , "AGT":"S" , "AGC":"S" , "AGA":"R" , "AGG":"R" , "GGT":"G", "GGC":"G" , "GGA":"G", "GGG":"G" }

# we need to store some stuff for further statistical tests some values 
totalRefSequencedBase = {}

opts, args = getopt.getopt(sys.argv[1:], 's:i:r:p:R:D:l:P:o:c:b:a:n:SGe:L:g:C:')  
for opt, arg in opts:
	if opt in ("-i"):
		integraseFile = arg
	elif opt in ("-s"):
		samFile = arg
	elif opt in ("-r"):
		rtFile = arg
	elif opt in ("-p"):
		proteaseFile = arg
	elif opt in ("-R"):
		referenceFile = arg
	elif opt in ("-D"):
		depthThreshold = int(arg)
	elif opt in ("-l"):
		ratioThreshold = int(arg)
	elif opt in ("-P"):
		pvalueThreshold = float(arg)
	elif opt in ("-o"):
		outputFile = arg
	elif opt in ("-c"):
		codonResultFile = arg
	elif opt in ("-C"):
		csFile = arg
	elif opt in ("-b"):
		inRunControlBam = arg
	elif opt in ("-g"):
		gtfFile = arg
	elif opt in ("-a"):
		allControlBamList = arg
	elif opt in ("-n"):
		nbThread = int( arg )
	elif opt in ("-S"):
		countStop = True
		sys.stderr.write('Stop codon detection activated.')
	elif opt in ("-G"):
		if isGraphicalPossible : 
			sys.stderr.write('Graphical capabilities activated.')
			graphical = True
		else:
			sys.stderr.write('Warning : Unable to use graphical capabilities.')
			graphical = False
	elif opt in ("-e") :
		logFile = arg
	elif opt in ("-L") :
		loggingLevel = int( arg )
	

if logFile == "" :
	logging.basicConfig(stream = sys.stderr , filemode = 'w', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
else:	
	logging.basicConfig(filename =  logFile , filemode = 'w', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
logging.info('start')
	
# sample name
if samFile.find( "/" ) != -1 :
	sample = samFile.split( "/" ) [-1]
else:
	sample = samFile
sample = sample.replace( ".bam" , "" )
sample = sample.split( "." )[0]
logging.info( "Sample name detected : %s" % sample )


# codon output file name
if countStop == True :
	codonResultFile = "codons.stop.tsv" 
else:
	codonResultFile = "codons.tsv"

 
def phredToInt( incString ):
    # convert to ascii 
    score = ord( incString )
     
    # substract 33 to the score 
    score -= 33
    
    return score

def igf( lddl , chiStat ):
    if chiStat < 0.0:
		return 0.0
    
    sc = 1.0 / lddl
    sc *= chiStat**lddl
    sc *= math.exp( -chiStat )
    sys.stderr.write( "sc value = %s\n" % ( sc ) )
    
    somme = 1.0
    nom = 1.0
    denom = 1.0
    
    for i in range( 1 , 200):
		nom *= chiStat
		lddl += 1
		denom *= lddl
		somme += (nom / denom)
		
    if ( somme * sc ) < 0.000000000000001:
		sys.stderr.write( "igf value = %s\n" % (0.000000000000001) )
		return 0.000000000000001
    
    sys.stderr.write( "igf value = %s\n" % (somme * sc ) )
    return somme * sc
    
def gamma( lddl ):
    RECIP_E = 0.36787944117144232159552377016147
    TWOPI = 6.283185307179586476925286766559
    D = 1.0 / (10.0 * lddl)
    D = 1.0 / ((12 * lddl) - D)
    D = (D + lddl) * RECIP_E
    D = D**lddl
    D *= math.sqrt(TWOPI / lddl)
    
    
    sys.stderr.write( "gamma = %s\n" % D )
    return D

def adequationChiTwo( effectTable , localRef , localAlt  ):
    
    
    # compute deviation between obs & theoritical
    thMean = sum( effectTable ) / len( effectTable )
    obsMean = localAlt * 100 / ( float( localRef ) + float( localAlt  ) )
    
    
    if obsMean < thMean:
		return 1
    
    if obsMean == 100:
		obsMean = 99.99999
    logging.info( "Th mean : %s ; obsMean : %s\n" % ( thMean , obsMean ))
    
    refDev = ( ( (100 - obsMean) - (100 - thMean) )**2 ) /  (100 - obsMean)
    altDev = ( ( obsMean - thMean )**2 ) / obsMean
    ddl = 1
    qSquare = refDev + altDev
    alpha = 5
    
    sys.stderr.write( "X2 statistical value = %s\n" % qSquare )
    
    # with alpha = 5% and a ddl = 1, the confidence interval is [0;5.991] 
    # H0 is rejected if qSquare superior to  5.991 with a 5% error risk
    # so there is asignificant difference between the position wie observed and the rest of the reference. 
    # need to compute a p-value
    
    return pochisq( qSquare , 1 )
    
def pochisq(x, df) :
       
    LOG_SQRT_PI = 0.5723649429247000870717135
    I_SQRT_PI = 0.5641895835477562869480795

    if x <= 0.0 or df < 1 :
	return 1.0

    s = 2.0 * poz( - math.sqrt( x ) )
    
    sys.stderr.write( " p-value : %s\n" % s )
    return s
 
def poz(z) :
    sys.stderr.write( "Incoming z : %s\n" % z )

    y = 0 
    x = 0 
    w = 0
    Z_MAX = 6.0          

    if z == 0.0 :
		x = 0.0
    else :
		y = 0.5 * abs(z)
		
		if (y >= (Z_MAX * 0.5)): 
			x = 1.0
		elif y < 1.0 :
			w = y * y
			x = ((((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w - 0.019198292004) * w + 0.059054035642) * w
				 - 0.151968751364) * w + 0.319152932694) * w - 0.531923007300) * w + 0.797884560593) * y * 2.0
		else :
			y -= 2.0
			x = (((((((((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y - 0.000676904986) * y + 0.001390604284) * y -   0.000794620820) * y - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y + 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y - 0.002141268741) * y + 0.000535310849) * y + 0.999936657524
		
	
    if z > 0.0 :
		sys.stderr.write( "Output : %s\n" % ((x + 1.0) * 0.5) )
		return ((x + 1.0) * 0.5)
    else:
		sys.stderr.write( "Output : %s\n" % ((1.0 - x) * 0.5) )
		return ((1.0 - x) * 0.5)

# need the bam already parsed to be used.
def getEffectiveTable( ref , excludeList ):
    localEffectifTable = []
    for position in bamCodon[ref].keys():
	
		# pass the position if new as a mutation
		if position in excludeList:
			continue
			
		totalOnPos = 0
		totalAltOnPos = 0

		refAA = codonTable[ referenceCodon[ref][position] ]
		for codon in bamCodon[ref][position].keys():
			totalOnPos += bamCodon[ref][position][codon]
			obsAA = codonTable[ codon ]
			
			if obsAA != refAA:
				totalAltOnPos += bamCodon[ref][position][codon]

		if totalAltOnPos == 0:
			localEffectifTable.append( 0 )
			#~ sys.stderr.write( "Adding 0 to %s effectives for pos : %s \n" % ( ref , position ) )
		else:
			localEffectifTable.append( totalAltOnPos * 100 / float( totalOnPos ) )
			#~ sys.stderr.write( "Adding %s to %s effectives for pos : %s \n" % ( totalAltOnPos * 100 / float( totalOnPos ) , ref , position ) )
			
    return localEffectifTable

# compute factoriel
def fact(n):
    """fact(n): calcule la factorielle de n (entier >= 0)"""
    if n<2:
        return 1
    else:
        return n*fact(n-1)

# compute log factoriel
def logFactoriel( inc ):
    ret = 0
    while inc > 0:
		ret += math.log( inc )
		inc -= 1
    return ret;

# compute log of hypergeometrique for FET
def logHypergeometricProb( a ,  b , c  , d ):
    return logFactoriel( a + b ) + logFactoriel( c + d ) + logFactoriel( a + c ) + logFactoriel( b + d )- logFactoriel( a ) - logFactoriel( b ) - logFactoriel( c ) - logFactoriel( d ) - logFactoriel( a + b + c + d )

# compute pvalue from FET
def FETPvalue( a , b , c, d ):
	#sys.stderr.write( "Computing a p-value from FET ..." )
	n = a + b + c + d 
	logpCutOff = logHypergeometricProb( a , b , c , d )
	pFraction = 0
	logpValue = 0
	
	for x in range( 0 , n ):
		if( ( a + b - x >= 0 ) and ( a + c - x >= 0 ) and ( d - a + x >= 0 ) ) :
			l = logHypergeometricProb( x , a + b - x , a + c - x , d - a + x )
			if  l <= logpCutOff :
				pFraction += math.exp( l - logpCutOff )

	logpValue = logpCutOff + math.log( pFraction )
	#sys.stderr.write( " done \n" )
	return math.exp(logpValue);

# determine if a codon is a stop or not
def isStop( incCodon ):
	stopList = [ "TAG" , "TAA" , "TGA" ]
	if incCodon in stopList :
		return True 
	else:
		return False

# compute the complement of a sequence
def complement( incCodon ):
	t = {"A":"T" , "T":"A" , "G":"C" ,"C":"G" }
	s = ""
	incCodon = incCodon.upper()
	for i in incCodon:
		s += t[i]
	return s
	
# deal with the ref genome
def get_genome():
	logging.info( "Starting genome parsing" )
	
	referenceStream = open( referenceFile , "r" )
	currentChr = ""
	i = 1
	for line in referenceStream:
		# name of the sequence
		line = line.strip()
		if line.startswith( ">" ):
			line = line.replace( ">" , "" )
			line = line.split( " " )[0]
			currentChr = line
			referenceSequence[line] = {}
			logging.info( "New reference sequence detected : %s." % currentChr )
		else:
			for base in line:
				referenceSequence[currentChr][i] = base
				i += 1
	referenceStream.close()
	
	logging.info( "Genome summary : \n" )
	for chr in referenceSequence.keys():
		logging.info("\t%s\t%s" % ( chr , len(referenceSequence[chr] )) )
	logging.info( "Ending genome parsing." )

# codon table for each genes. 
def get_genes():
	# parse the gtf file to get every cds 
	# warning : need to deal with genes in reverse ... 
	logging.info( "Starting genes parsing." )
	
	for line in open( gtfFile , 'r' ):
		line = line.strip()
		if line.split( "\t" )[0] not in referenceSequence.keys() :
			continue
		if line.split( "\t" )[2] == "CDS" :
			a = line.split( "\t" )[8] 
			aTable = a.split( ";" )
			for elt in aTable :
				if elt.startswith( "protein_id" ):
					g = elt.split( "=" )[1]
					g = g.replace( '"' , '' )
					if g not in genes.keys():
						genes[g] = {}
						referenceCodon[g] = {}
						genes[g]["start"] = []
						genes[g]["end"] = []
						genes[g]["strand"] = ""
						genes[g]["chrom"] = line.split( "\t" )[0]
						
						
					genes[g]["start"].append( int( line.split( "\t" )[3] ) )
					genes[g]["end"].append( int( line.split( "\t" )[4] ) )
					genes[g]["strand"] = line.split( "\t" )[6]	
						
	# get codon from reference
	for g in genes.keys():
		logging.info( "Getting codons for gene : %s" % g )
	
		codon = ""
		chrom = genes[g]["chrom"]
		logging.info( "\tchrom is %s" % chrom )
		if genes[g]["strand"] == "+" :
			logging.info( "\tstrand is +" )
			posList = referenceSequence[chrom].keys()
			posList.sort()
			for pos in posList:
				for start,end in zip(genes[g]["start"],genes[g]["end"]) :
					if (pos >= start) and (pos <= end) :
						codon += referenceSequence[chrom][pos]
						if (len(codon) % 3 == 0) and (len(codon) != 0 ) :
							codon = codon.upper()
							referenceCodon[g][(pos-3)+1] = codon
							# ~ referenceCodon[g][(pos-3)] = codon
							codon = ""
					
		if genes[g]["strand"] == "-" :
			logging.info( "\tstrand is -" )
			posList = referenceSequence[chrom].keys()	
			posList.sort()
			posList.reverse()
			for pos in	posList :
				for start,end in zip(genes[g]["start"],genes[g]["end"]) :
					if (pos >= start) and (pos <= end) :
						codon += referenceSequence[chrom][pos]
						if (len(codon) % 3 == 0) and (len(codon) != 0 ) :
							codon = codon.upper()
							referenceCodon[g][(pos+3)+1] = complement(codon)
							# ~ referenceCodon[g][(pos+3)] = complement(codon)
							codon = ""
						
				
				
				
				
	logging.info( "Genes content : " )
	
	for g in referenceCodon.keys():
		posList = referenceCodon[g].keys()
		posList.sort()
		for pos in posList:
			logging.info( "%s\t%s\t%s" % ( g , pos , referenceCodon[g][pos] ) )
			
	
	
	# conversion to the protein numbers 
	for g in referenceCodon.keys():
		protCodon[g] = {}
		i = 1
		posList = referenceCodon[g].keys()	
		posList.sort()
		for pos in posList :
			protCodon[g][pos] = i
			i += 1

	logging.info( "/ conversion : " )
	for g in protCodon.keys():
		posList = protCodon[g].keys()	
		posList.sort()
		for pos in posList:
			logging.info( "%s\t%s\t%s" % ( g , pos , protCodon[g][pos] ) )
	
	
		
	logging.info( "Ending genes parsing." )
		
def display_reference_codon():
	w = open( "referenceCodon.tsv" , "w" )
	w.write( "##### Ref codons : #####\n" ) 
	for g in referenceCodon.keys():
		for pos in sorted(referenceCodon[g].keys()):
			w.write( "\t%s : %s : %s \n" % (g, pos , referenceCodon[g][pos]))	
	w.close()

def display_prot_codon():
	w = open( "protCodon.tsv" , "w" )
	w.write( "##### Prot codons : #####\n" ) 
	for g in protCodon.keys():
		for pos in sorted(protCodon[g].keys()):
			w.write( "\t%s : %s : %s \n" % (g, pos , protCodon[g][pos]))	
	w.close()

def display_bam_codon():
	
	w = open( "bamCodon.tsv" , "w" )
	w.write( "###### bam codons ######\n")
	for a in bamCodon.keys():
		for b in sorted(bamCodon[a].keys()):
			for c in bamCodon[a][b].keys():
				w.write( "%s %s %s %s\n" % ( a , b , c , bamCodon[a][b][c] ) )
	w.close()
	
def display_bam_ref_codon():
	
	w = open( "bamRefCodon.tsv" , "w" )
	w.write( "###### bam codons ######\n")
	for a in bamCodonReference.keys():
		for b in sorted(bamCodonReference[a].keys()):
			for c in bamCodonReference[a][b].keys():
				w.write( "%s %s %s %s\n" % ( a , b , c , bamCodonReference[a][b][c] ) )
	w.close()

def display_bam_reference_codon():
	
	w = open( "bamCodonReference.tsv" , "w" )
	w.write( "###### bam codons ######\n")
	for a in bamCodonReference.keys():
		for b in sorted(bamCodonReference[a].keys()):
			for c in bamCodonReference[a][b].keys():
				w.write( "%s %s %s %s\n" % ( a , b , c , bamCodonReference[a][b][c] ) )
	w.close()

def get_annotation():
	logging.info( "Starting genotype tables parsing" )
	logging.info( "Starting %s parsing" % integraseFile )
	global annotationIsDefined
	if integraseFile != "":
		annotationIsDefined = True
		refFound = False
		# parse & store genotyping files   
		integraseStream = open( integraseFile,  "r" )
		for line in integraseStream:
			if line.startswith( " " ) or line.startswith( "Position" ) or line.startswith( "AA" ) or  len(line) < 10:
				continue
			
			if line.startswith( "#" ) :
				temp = line.strip().split( "#" )
				ref = temp[1]
				knownMutationTable[ref] = {}
				refFound = True
				continue
		
			if not refFound :
				logging.error( "Error : %s file is malformed. Please redo the analysis without it or correct the file (see doc at http://blabla.html)\n" % integraseFile )
				sys.exit( 1 )
				
			lineTable = line.strip().split( "\t" )
			index = lineTable[2] + lineTable[0] + lineTable[4]
			if loggingLevel >= 3:
				logging.info( "index : %s" % index )
			if lineTable[2] == lineTable[4]:
				continue
		
			knownMutationTable[ref][index] = lineTable[5]
		
		integraseStream.close()
		logging.info( "\tINTEGRASE : OK\n" )
	else:
		logging.info( "\tINTEGRASE : no file provided" )

	logging.info( "Starting %s parsing" % rtFile )
	if rtFile != "":
		annotationIsDefined = True
		refFound = False
		# parse & store genotyping files
		
		reverseTranscriptaseStream = open( rtFile,  "r" )
		for line in reverseTranscriptaseStream:
			if line.startswith( " " ) or line.startswith( "Position" ) or line.startswith( "AA" ) or  len(line) < 10:
				continue
			
			if line.startswith( "#" ) :
				temp = line.strip().split( "#" )
				ref = temp[1]
				knownMutationTable[ref] = {}
				refFound = True
				continue
			
			if not refFound :
				logging.error( "Error : %s file is malformed. Please redo the analysis without it or correct the file (see doc at http://blabla.html)\n" % rtFile )
				sys.exit( 1 )
			lineTable = line.strip().split( "\t" )
			index = lineTable[2] + lineTable[0] + lineTable[4]
			if loggingLevel >= 3:
				logging.info( "index : %s" % index )
			if lineTable[2] == lineTable[4]:
				continue
			knownMutationTable[ref][index] = lineTable[5]
		
		reverseTranscriptaseStream.close()
		logging.info( "\tREVERSETRANSCRIPTASE : OK\n" )
	else:
		logging.info( "\tREVERSETRANSCRIPTASE : no file provided\n" )

	logging.info( "Starting %s parsing" % proteaseFile )
	if proteaseFile != "":
		annotationIsDefined = True
		refFound = False
		# parse & store genotyping files
		
		proteaseStream = open( proteaseFile,  "r" )
		for line in proteaseStream:
			if line.startswith( " " ) or line.startswith( "Position" ) or line.startswith( "AA" ) or  len(line) < 10:
				continue
			
			if line.startswith( "#" ) :
				temp = line.strip().split( "#" )
				ref = temp[1]
				knownMutationTable[ref] = {}
				refFound = True
				continue
			
			if not refFound :
				logging.error( "Error : %s file is malformed. Please redo the analysis without it or correct the file (see doc at http://blabla.html)\n" % proteaseFile )
				sys.exit( 1 )
			lineTable = line.strip().split( "\t" )
			index = lineTable[2] + lineTable[0] + lineTable[4]
			if loggingLevel >= 3:
				logging.info( "index : %s" % index )
			if lineTable[2] == lineTable[4]:
				continue
			knownMutationTable[ref][index] = lineTable[5]
		
		proteaseStream.close()
		logging.info( "\tPROTEASE : OK\n" )
	else:
		logging.info( "\tPROTEASE : no file provided\n" )
	   
	logging.info( "Genotype tables parsing done\n" )

	resultTable = {}
	qualityTable = {}

	resultTable["ReverseTranscriptase"] = {}
	resultTable["Protease"] = {}
	resultTable["Integrase"] = {}

	qualityTable["ReverseTranscriptase"] = {}
	qualityTable["Protease"] = {}
	qualityTable["Integrase"] = {}

###### deal with inRunControlBam 
def read_inRunControl():
	global bamCodonReference
	logging.info( "Starting bam control file parsing : %s" % ( inRunControlBam ) )
	bamIterRef = pysam.AlignmentFile( inRunControlBam , "r" )
	bamCodonReference = {}
	# samStream = open( samFile , "r" )
	countLine = 0
	for line in bamIterRef:
		if ( countLine != 0 ) and (countLine % 200000 == 0 ):
			logging.info( "%s aln read\n" % countLine ) 
		
		
		if loggingLevel >= 3:
			logging.info('New read to parse : %s' % str(line) )
		# pass bad alignements
		if (line.is_unmapped == True ) or (line.is_secondary == True ) or (line.is_supplementary == True) or (int(line.mapping_quality) < 30) :
			if loggingLevel >= 3:
				logging.info('Passing sequence : bad quality ' )
			continue

		# get position and ref 
		ref = line.reference_name
		position = int( line.reference_start) + 1
		sequence = line.query_sequence
		quality = line.query_qualities
		
		# non aligned reads, theoritically unreachable because tested before
		if ref == "*" :
			if loggingLevel >= 3:
				logging.info('Passing sequence : not aligned : %s ' % ref )
			continue
			
				
		# test CIGAR string : if insertion or deletion, we skip the read
		cigar = line.cigarstring
		
		if "I" in cigar or "H" in cigar or "S" in cigar or "D" in cigar:
			if loggingLevel >= 3:
				logging.info('Passing sequence : CIGAR contains a special event : %s ' % cigar )
			continue
			
		currentPosition = position

		i = 0 
		countLine += 1
		# browse every codons, but need to determine if the first codon is complete or not
		while ( i < len(sequence) ) and (i + 3 <= len( sequence ) ) :
			complete = False
			for g in genes.keys():
				for start,end in zip(genes[g]["start"],genes[g]["end"]) :
					if currentPosition >= start and currentPosition <= end : 
						# manage new chr encounters 
						if not bamCodonReference.has_key( g ):
							logging.info('New gene found in data : %s' % g )
							bamCodonReference[g] = {}
						
						
			
						if protCodon[g].has_key( currentPosition ):
							complete =  True
							codon = sequence[i:i + 3]
							codon = codon.upper()
							if loggingLevel >= 3:
								logging.info('Adding a codon on ' + g + ' : pos on read : ' + str(i) + ' ; pos on ref : ' + str( currentPosition ) + ' ; codon : ' + codon + '(' + str(((currentPosition-start)/3)+1 ) + ')' )
							qualityCodon = quality[i:i+3]
							qualityScore = 0
							takeIt = 1
							for base in qualityCodon:
								qualityScore += int( base )
								if int(base) < 30 :
									takeIt = 0
								
								if takeIt == 1:
									if not bamCodonReference[g].has_key(((currentPosition-start)/3)+1 ):
										bamCodonReference[g][((currentPosition-start)/3)+1] = {}
									if not bamCodonReference[g][((currentPosition-start)/3)+1].has_key( codon ):
										bamCodonReference[g][((currentPosition-start)/3)+1][codon] = 0
									bamCodonReference[g][((currentPosition-start)/3)+1][codon] += 1
							
						else:
							complete = False
							
			if complete : 
				i += 3 
				currentPosition += 3 
			else:
				# codon is not complete
				if loggingLevel >= 3:
					logging.info('Not on a 1st base, passing to next base : pos on read : ' + str(i) + ' ; pos on ref : ' + str( currentPosition )  )
				i += 1
				currentPosition += 1 
			
	logging.info( "Parsing done with %s aln treated\n" % countLine )

def read_sample():
	############ deal with bam file
	logging.info( "Parsing bam file : %s ..." % ( samFile ) )
	# ~ logging.info( "Starting bam file parsing : %s" % ( samFile ) )
	bannedReadID = []
	countLine = 0

	bamIter = pysam.AlignmentFile( samFile , "r" )
	global bamCodon
	
	#samStream = open( samFile , "r" )

	for line in bamIter:
		if loggingLevel >= 3:
			logging.info('New read to parse : %s' % str(line) )
		# pass bad alignements
		if (line.is_unmapped == True ) or (line.is_secondary == True ) or (line.is_supplementary == True) or (int(line.mapping_quality) < 30) :
			if loggingLevel >= 3:
				logging.info('Passing sequence : bad quality ' )
			continue
		
		# pass stop pairs if activated
		if line.query_name in bannedReadID :
			# ~ sys.stderr.write( 'Passing sequence : stop in pair \n' ) 
			continue

		# get position and ref 
		ref = line.reference_name
		position = int( line.reference_start) + 1
		sequence = line.query_sequence
		quality = line.query_qualities
		
		# non aligned reads, theoritically unreachable because tested before
		if ref == "*" :
			if loggingLevel >= 3:
				logging.info('Passing sequence : not aligned : %s ' % ref )
			continue
		

		
		#~ sys.stderr.write( "%s\n" % refTable )
		
		# test CIGAR string : if insertion or deletion, we skip the read
		cigar = line.cigarstring
		
		if "I" in cigar or "H" in cigar or "S" in cigar or "D" in cigar:
			if loggingLevel >= 3:
				logging.info('Passing sequence : CIGAR contains a special event : %s ' % cigar )
			continue
		
		
		# manage deletion 
		# ~ positionOfDel = 0 
		# ~ lengthOfDel = 0
		# ~ if "D" in cigar:
			# ~ #logging.info('Passing sequence : Deletion detected : ' + cigar + " ; sequence : " + sequence )
			
			# ~ # get position of the del in the read and its size
			# ~ for elt in line.cigartuples:
				# ~ if elt[0] == 2 :
					# ~ lengthOfDel = elt[1]
					# ~ break
				# ~ if elt[0] == 0:
					# ~ positionOfDel += elt[1]
			

		currentPosition = position
		# extract codon sequences
		i = 1 
		
		# browse the sequence to look for stop codon if stop analysis detected 
		takeSequence = True
		
		## need a full rewamp of the stop detection ####
		# ~ if countStop:
			# ~ while i < len(sequence) :
				# ~ if i + 3 > len( sequence ):
					# ~ break
				# ~ if currentPosition % 3 == 0 or currentPosition == 1:
					# ~ codon = sequence[i:i + 3]
					# ~ codon = codon.upper()
					# ~ if isStop( codon ):
						# ~ takeSequence = False
						# ~ bannedReadID.append( line.query_name )
						# ~ break
					# ~ i += 3
					# ~ currentPosition += 3 
				# ~ else:
					# ~ i += 1
					# ~ currentPosition += 1 
		#################################################
		
		countLine += 1
		currentPosition = position
		# browse every codons, but need to determine if the first codon is complete or not
		if takeSequence :
			i = 0 
			# deal with end of sequence
			while ( i < len(sequence) ) and (i + 3 <= len( sequence ) ) :
				# detect the gene 
				complete = False
				for g in genes.keys():
					for start,end in zip(genes[g]["start"],genes[g]["end"]) :
						if currentPosition >= start and currentPosition <= end : 
							# manage new gene encounters 
							if not bamCodon.has_key( g ):
								logging.info('New reference found : %s' % g )
								bamCodon[g] = {}
									
							# codon is complete
							if protCodon[g].has_key( currentPosition ):
								complete = True
								codon = sequence[i:i + 3]
								codon = codon.upper()
								if loggingLevel >= 3:
									logging.info('Adding on ' + g + ' : pos on read : ' + str(i) + ' ; pos on ref : ' + str( currentPosition ) + ' ; codon : ' + codon + '(' + str(((currentPosition-start)/3)+1 ) + ')' )
								qualityCodon = quality[i:i+3]
								qualityScore = 0
								takeIt = 1
								for base in qualityCodon:
									qualityScore += int( base )
									if int(base) < 30 :
										takeIt = 0
								
								if takeIt == 1:
									if not bamCodon[g].has_key(((currentPosition-start)/3)+1 ):
										bamCodon[g][((currentPosition-start)/3)+1] = {}
									if not bamCodon[g][((currentPosition-start)/3)+1].has_key( codon ):
										bamCodon[g][((currentPosition-start)/3)+1][codon] = 0
									bamCodon[g][((currentPosition-start)/3)+1][codon] += 1
														
							else:
								# codon is not complete
								complete = False
								
				
				if complete :
					i += 3 
					currentPosition += 3 
				else:		
					if loggingLevel >= 3:
						logging.info('Not on a 1st base, passing to next base : pos on read : ' + str(i) + ' ; pos on ref : ' + str( currentPosition ) )
					i += 1
					currentPosition += 1 

	logging.info( " done with %s aln kept\n" % countLine )
	logging.info( "%s sequence discarded due to stop codon.\n" % len( bannedReadID ) )


def get_aa( g , ppos  ):
	gpos = 0 
	# get the genomic position 
	for elt in protCodon[g].keys():
		if ( protCodon[g][elt] == ppos ):
			gpos = elt
			break
			
	# verify
	if gpos == 0:
		logging.error( "Error : submitted position not in the reference. Please check data consistency\n" )
	
	# get the codon 
	codon = referenceCodon[g][gpos] 
	
	
	
	
	# get the aa
	aa = codonTable[codon] 
	
	
	# ~ sys.stderr.write( "Gene : %s ; genomic pos : %s ; proteic pos : %s ; codon : %s ; aa : %s\n" % ( g , gpos , ppos , codon , aa ) )
	
	
	return aa 

	
def get_gpos_from_ppos( g,  ppos ):
	for elt in protCodon[g].keys():
		if ( protCodon[g][elt] == ppos ):
			gpos = elt
			break
			
	# verify
	if gpos == 0:
		logging.error( "Error : submitted position not in the reference. Please check data consistency\n" )
	
	return gpos
	

# let's modelize the background noise
def modelize_noise():
	global bamCodon
	# count codons wich are not the reference
	logging.info( "Counting codons ... " )
	for ref in bamCodon.keys():
		# ~ sys.stderr.write( "\ton ref %s\n" % ref )
		outStream = open( sample + "/" + sample + "." + ref + ".percent.data" , "w" )
		noiseTable = {}
		for position in bamCodon[ref].keys():
			# ~ sys.stderr.write( "\t\ton p.pos %s\n" % position )
			totalOnPos = 0
			if not noiseTable.has_key( position ):
				noiseTable[position] = {}
			
			#  position is in prot. 
			# We need to convert to a g position
			refAA = get_aa( ref , position )
			
			for codon in bamCodon[ref][position].keys():
				# ~ sys.stderr.write( "\t\t\ton codon %s\n" % codon )
				totalOnPos += bamCodon[ref][position][codon]
				obsAA = codonTable[ codon ]
				
				if obsAA != refAA:
					if not noiseTable[position].has_key( obsAA ):
						noiseTable[position][obsAA] = 0
					noiseTable[position][obsAA] += bamCodon[ref][position][codon]
				
		# we have all data for counting
			for aa in noiseTable[position].keys():
				noiseTable[position][aa] = noiseTable[position][aa] * 100 / float(totalOnPos)

		listofAA = list( set( codonTable.values() ) )
		outStream.write( "#position\t%s\n" % "\t".join( listofAA ) )
		for position in noiseTable.keys():
			outStream.write( "%s" % position )
			for a in listofAA : 
				if noiseTable[position].has_key( a ):
					outStream.write( "\t%s" % noiseTable[position][a] )
				else:
					outStream.write( "\t0" )
			outStream.write( "\n" )
		outStream.close()
	sys.stderr.write( " done\n " )
	
	# count codons wich are not the reference
	logging.info( "Modelizing background noise  ... " )

	for ref in bamCodon.keys():
		totalRefSequencedBase[ref] = 0
		totalAltSequencedBase[ref] = 0
		bigTotal = 0
		noiseTable = {}
		if not noiseTable.has_key( ref ):
			noiseTable[ref] = {}  
		outStream = open( sample + "/" + sample + "." + ref + ".value.data" , "w" )
		
		for position in bamCodon[ref].keys():
			totalOnPos = 0
			totalAltOnPos = 0
			
			if not noiseTable[ref].has_key( position ):
				noiseTable[ref][position] = {}
			
			refAA = get_aa( ref , position )
			for codon in bamCodon[ref][position].keys():
				totalOnPos += bamCodon[ref][position][codon]
				bigTotal += bamCodon[ref][position][codon]
				obsAA = codonTable[ codon ]
				
				if obsAA != refAA:
					if not noiseTable[ref][position].has_key( obsAA ):
						noiseTable[ref][position][obsAA] = 0
					noiseTable[ref][position][obsAA] += bamCodon[ref][position][codon]
					totalAltSequencedBase[ref] += bamCodon[ref][position][codon]
					totalAltOnPos += bamCodon[ref][position][codon]
			
			#~ sys.stderr.write( "Position : %s , Ref : %s ,  Alt : %s , Total : %s \n" % ( position , totalOnPos - totalAltOnPos , totalAltOnPos , totalOnPos ) )
					
			totalRefSequencedBase[ref] += bigTotal - totalAltSequencedBase[ref]
		
		listofAA = list( set( codonTable.values() ) )
		outStream.write( "#position\t%s\n" % "\t".join( listofAA ) )
		for position in noiseTable[ref].keys():
			outStream.write( "%s" % position )
			for a in listofAA : 
				if noiseTable[ref][position].has_key( a ):
					outStream.write( "\t%s" % noiseTable[ref][position][a] )
				else:
					outStream.write( "\t0" )
			outStream.write( "\n" )
		outStream.close()
	logging.info( " done\n " )

# calculate distributions 
def compute_distribution():
	sys.stderr.write( "Computing distribution  ... " )
	for ref in bamCodon.keys():
		sys.stderr.write( "Calculating distribution for %s \n" % ref )
		incStream = open( sample + "/" + sample + "." + ref + ".value.data" , "r" )
		contingencyTable = {}
		
		for line in incStream : 
			#sys.stderr.write( "%s" % line )
			if line.startswith( "#position" ):
				continue
			myTable = line.split( "\t" )
			
			for elt in myTable[1:]:
				i_elt = int( elt )
				if not contingencyTable.has_key( i_elt ):
					contingencyTable[i_elt] = 0
				contingencyTable[i_elt] += 1
		
		totalSum = 0
		for elt in contingencyTable.keys():
			totalSum += contingencyTable[elt]
		
		# calculate probabilities
		for elt in contingencyTable.keys():
			tmp = contingencyTable[elt]
			contingencyTable[elt] = tmp / float( totalSum )
		
		
		outStream = open( sample + "/" + sample + "." + ref+".value.density" , "w" )
		outStream.write( "#value\tcount\n" )
		for elt in contingencyTable.keys():
			outStream.write( "%s\t%s\n" % ( elt , contingencyTable[elt] ) )
		outStream.close()
		
	sys.stderr.write( " done\n " )


def variant_calling():
	global bamCodon
	global bamCodonReference
	# detect the potential background noise 
	a = 0
	b = 1
	turn = 1
	positionToDiscard = []
	bg = {}
	while b != 0:
		bg = {}
		bgEff = {}
		b = 0
		
		logging.info( "BG estimation : turn %s\n" % turn )
		
		# first : get data for chisquare test
		for ref in bamCodon.keys():
			if loggingLevel >= 3:
				logging.info( "\tOn ref : %s\n" % ref )
			altCount = []
			refCount = []
			if not bg.has_key( "ref" ):
				bg[ref] = []
				bgEff[ref] = []

			
			for position in bamCodon[ref].keys():
				if loggingLevel >= 3:
					logging.info( "\t\tOn pos : %s\n" % position )
				positionAltCount = 0
				positionTotalCount = 0
				if position in positionToDiscard:
					logging.info( "discarded pos : %s Passing \n" % position ) 
					continue
				
				
				for codon in bamCodon[ref][position].keys():
					positionTotalCount += bamCodon[ref][position][codon]
				# ~ sys.stderr.write( "On %s : pos %s , totalD : %s\n" % ( ref , position , positionTotalCount ) )

				for codon in bamCodon[ref][position].keys():
					if loggingLevel >= 3:
						logging.info( "\t\t\tOn codon : %s\n" % codon )
					gpos = get_gpos_from_ppos( ref, position )
					refCodon = referenceCodon[ref][gpos]
					if codon != refCodon :
						positionAltCount = bamCodon[ref][position][codon]
						if loggingLevel >= 3:
							logging.info( "On %s : pos %s , alt : %s ; %s vs %s \n" % ( ref , position , positionAltCount , refCodon , codon ) )
						bg[ref].append( positionAltCount / float( positionTotalCount ) )
						
				if positionAltCount > positionTotalCount :
					logging.critical( "Critical error : more alt allele than the total on position %s\n" % position )
					sys.exit( 1 )
					
				altCount.append( positionAltCount )
				refCount.append( positionTotalCount )
			
		
		
		
		
			if len( altCount ) == 0 :
				logging.error( "Error : all position (alt) excluded on ref %s\n" % ref )
				continue
				
			if len( refCount ) == 0 :
				logging.error( "Error : all position (ref) excluded on ref %s\n" % ref )
				continue
			
			bgEff[ref].append( sum( altCount ) / len( altCount ) )
			bgEff[ref].append( sum( refCount ) / len( refCount ) )
			logging.info( "%s : %s , %s\n" % ( ref , bgEff[ref][0] , bgEff[ref][1] ) )
			
		# now redo a loop to find mutations
		for ref in bamCodon.keys():
			for position in bamCodon[ref].keys():
				positionAltCount = 0
				positionTotalCount = 0
				if position in positionToDiscard:
					continue

				for codon in bamCodon[ref][position].keys():
					positionTotalCount += bamCodon[ref][position][codon]
				
				
				for codon in bamCodon[ref][position].keys():
					gpos = get_gpos_from_ppos( ref , position )
					refCodon = referenceCodon[ref][gpos]
					if codon != refCodon :
						positionAltCount = bamCodon[ref][position][codon]
						# chisquare test to determine if position is significantly different from ref
						t,p = stats.chisquare( positionAltCount / float( positionTotalCount ) , bg[ref] )
						#~ sys.stderr.write( "Testing bg : %s , %s , %s, %s\n" % ( positionAltCount , positionTotalCount , ",".join( bg[ref] ) , p )  )
						if p < 0.01: 
							positionToDiscard.append( position )
							b += 1
							a += 1

		logging.info( "End of turn %s with %s new mutation found\n" % ( turn , b ) ) 
		turn += 1
		


	# background noise is estimated with a control sample.
	logging.info( "Calling variants ... \n" )

	nbMutTotal = 0
	resultTable = {}
	
	
	if not os.path.isdir( sample ):
		os.makedirs( sample )
	codonFileResult = open( codonResultFile , "w" )
	codonFileResult.write( "#Reference\treferenceCodon\treferenceAA\tposition\tsequencedCodon\tsequencedAA\tcounts\tallelicratio\t\tctrlCounts\tctrlRatio\n" )

	mutationFileResult = open( hsh , "w" )



	testToPerform = []

	for ref in bamCodon.keys():
		
		resultTable[ref] = {}    
		totalOnPos = 0
		logging.info( "ref : %s\n" % ref )
		for position in bamCodon[ref].keys():
			#~ logging.info( "ref : " + ref + " ; position : " + str( position ) )
			gpos = get_gpos_from_ppos( ref , position )
			refCodon = referenceCodon[ref][gpos]
			refAA = codonTable[refCodon]
			
			totalOnPos = 0
			totalOnPosReference = 0
			for a in bamCodon[ref][position].keys():
				totalOnPos += bamCodon[ref][position][a]
			
			if not bamCodonReference.has_key( ref ) :
				logging.warning( "The in-run control sample hasn't any %s ref : passing \n" % ref )
				break
			
			
			if bamCodonReference[ref].has_key( position ):
				for a in bamCodonReference[ref][position].keys():
					totalOnPosReference += bamCodonReference[ref][position][a]
			
				
			
			for codon in bamCodon[ref][position].keys():
				count = bamCodon[ref][position][codon]
				obsAA = codonTable[codon]
				
				if bamCodonReference[ref].has_key( position ):
					if bamCodonReference[ref][position].has_key( codon ):
						countCtrl = bamCodonReference[ref][position][codon]
					else:
						countCtrl = 0
				else:
					countCtrl = 0
				
				ratioCtrl = 0
				ratio = 0
				if totalOnPos != 0:
					ratio = ( count * 100 ) / float(totalOnPos)
				
				if totalOnPosReference != 0:
					ratioCtrl = ( countCtrl * 100 ) / float(totalOnPosReference)
				
				codonFileResult.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s/%s\t%s\t%s\t%s\n" % ( ref , refCodon , refAA , position , codon , obsAA , count , totalOnPos , ratio , countCtrl , ratioCtrl ) )
				 
				# output mutation		
				if obsAA != refAA and totalOnPos > 200 and count > 30  and  countCtrl < count :
					logging.info( "Testing position : %s on %s , AA : %s (ref %s ), depth : %s , ratio : %s" % ( position , ref , obsAA , refAA , totalOnPos , ratio  ) ) 
					
					# Fisher exact test to determine if different from the in run control  
					oddsratio,pValue = stats.fisher_exact([[count , totalOnPos] , [countCtrl , totalOnPosReference]] )
					logging.info( "\tFET p-value : %s" % pValue )
					if pValue < 0.01:  
						
						#  contingency chi 2 test vs background noise of the sample
						chi2, p, dof, ex = stats.chi2_contingency( [[count , totalOnPos ] , [ bgEff[ref][0] , bgEff[ref][1] ]] , correction=False)
						logging.info( "\tchi2 p-value : %s " % p )
						if p < 0.01:
							logging.info( "Writing in results %s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( ref , refCodon , refAA , position , codon , obsAA , count , totalOnPos , countCtrl , totalOnPosReference , pValue , p ) )
							mutationFileResult.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( ref , refCodon , refAA , position , codon , obsAA , count , totalOnPos , countCtrl , totalOnPosReference , pValue , p ))
						else:
							logging.info( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s was different from control but in background noise\n" % ( ref , refCodon , refAA , position , codon , obsAA , count , totalOnPos , countCtrl , totalOnPosReference , pValue , p ) )
	logging.info( "Done, performing statistical tests ...\n" )


	codonFileResult.close()
	mutationFileResult.close()

# stats for graphics
def compute_stats_for_graphics():
	statsForGraph = {}
	for ref in bamCodon.keys():
		
		statsForGraph[ref] = {}	
		totalOnPos = 0
		logging.info( "ref : " + ref )
		for position in bamCodon[ref].keys():
			#~ logging.info( "ref : " + ref + " ; position : " + str( position ) )
			refCodon = referenceCodon[ref][position]
			refAA = codonTable[refCodon]
			statsForGraph[ref][position] = {}
			
			
			totalOnPos = 0
			totalOnPosReference = 0
			for a in bamCodon[ref][position].keys():
				totalOnPos += bamCodon[ref][position][a]
				
			if bamCodonReference[ref].has_key( position ) :
				for a in bamCodonReference[ref][position].keys():
					totalOnPosReference += bamCodonReference[ref][position][a]
			else:
				totalOnPosReference = 0
			
			
			for codon in bamCodon[ref][position].keys():
				statsForGraph[ref][position][codon] = {}
				
				count = bamCodon[ref][position][codon]
				obsAA = codonTable[codon]
				
				if bamCodonReference[ref].has_key( position ):
					if bamCodonReference[ref][position].has_key( codon ):
						countCtrl = bamCodonReference[ref][position][codon]
					else:
						countCtrl = 0
				else:
					countCtrl = 0
				
				# FET
				oddsratio,pValue = stats.fisher_exact([[count , totalOnPos] , [countCtrl , totalOnPosReference]] )
				statsForGraph[ref][position][codon]['FET'] = pValue
				
				# chisquare
				chi2, p, dof, ex = stats.chi2_contingency( [[count , totalOnPos ] , [ bgEff[ref][0] , bgEff[ref][1] ]] , correction=False)
				statsForGraph[ref][position][codon]['CHI'] = p
				statsForGraph[ref][position][codon]['codonDepth'] = count


# writing final results after annotation
def write_results():
	global annotationIsDefined
	finalFileResult = open( outputFile , "w" )
	finalFileResult.write( "#Reference\taa Position\tReference Codon\tReference aa\tAlternative Codon\tAlternative aa\tDepth\tAlternative Allele Ratio\tFET ctrl p-value\tFET bg pValue\tAnnotation\n")
	mutationFileResult = open( hsh , "r" )

	for line in mutationFileResult:
		lineTable = line.strip().split( "\t" )
		ref = lineTable[0]
		annotation = "No annotation found"
		
		if annotationIsDefined :
			index = lineTable[2] + lineTable[3] + lineTable[5]
			logging.info( "looking for annotation for %s\n" % index )
			if index in knownMutationTable[ref].keys():
				annotation = knownMutationTable[ref][index]
		
		depth = int( lineTable[7] )
		ratio = ( float( lineTable[6] ) *100 ) / depth 
		
		finalFileResult.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s pct\t%s\t%s\t%s\n" % ( ref , lineTable[3] , lineTable[1] , lineTable[2] , lineTable[4] , lineTable[5] , depth , ratio , lineTable[10], lineTable[11] ,  annotation ) ) 
	mutationFileResult.close()
	finalFileResult.close()


# writing consensus sequence
def compute_consensus():
	global bamCodon
	consFile = open( csFile , "w" )
	for ref in bamCodon.keys():
		consFile.write( ">" + ref + "\n" )
		for position in sorted( bamCodon[ref].keys() ) :
			count = 0
			retainedCodon = ""
			for codon in bamCodon[ref][position].keys():
				if bamCodon[ref][position][codon] > count:
					count = bamCodon[ref][position][codon]
					retainedCodon = codon
			consFile.write( retainedCodon )
		consFile.write( "\n" )
	consFile.close()







# main 
def main( ):
	#verify_req()
	get_genome()
	get_genes()
	get_annotation()
	display_reference_codon()
	display_prot_codon()
	read_inRunControl()
	read_sample()
	
	display_bam_codon()
	display_bam_ref_codon()
	
	modelize_noise()
	variant_calling()
	write_results()
	compute_consensus()
	
	
if __name__ == "__main__":
	main()