test_final_multithreading.py 3.23 KB
Newer Older
1 2 3 4
import pysam
import concurrent.futures
import sys
import getopt
5 6
import logging

7 8

#Options
9
output_file = ""
10
try:
11
    opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:')
12 13 14 15 16 17 18 19 20 21
    for opt, arg in opts:
        if opt in ("-b"):
            bamfile = arg
        if opt in ("-w"):
            window_size = int(arg)
        if opt in ("-s"):
            step_size = int(arg)
        if opt in ("-t"):
            num_threads = int(arg)
        if opt in ("-o"):
22 23 24
            output_file = open(arg, "a")
        if opt in ("-e"):
            logfile = arg
25 26 27
except getopt.GetoptError:
    print('Invalid argument')
    sys.exit(1)
28 29 30
    
logging.basicConfig(filename =  '%s' % (logfile), filemode = 'a', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
logging.info('start')
31

32 33
def calcul_depth_seq(bamfile, seq, window_size, step_size, numero_de_thread):
    sys.stderr.write("\t entering calcul_depth_seq : thread numero %s\n" % numero_de_thread)
34 35 36 37 38 39 40 41 42 43 44
    samfile = pysam.AlignmentFile(bamfile, "rb")
    seq_length = samfile.lengths[samfile.references.index(seq)] #Permet d'obtenir la longueur de la sequence

    for pos_start in range(0, seq_length - window_size +1, step_size): #Itère sur la sequence par "fenêtre" en fonction de la taille
        pos_end = pos_start + window_size #pos_start + window_size donne la position de début de la fenêtre, puis grâce à min() et seq_length je peux définir position de fin. le min me permet d'être certain que pos_end < seq_lenght. C-a-d que si la fenêtre dépasse la taille de la sequence, j'aurais quand même pos_end
        window_depths = [] #liste pour stocker la profondeur de chaque fenêtre

        for pileupcolumn in samfile.pileup(seq, start=pos_start, stop=pos_end): #itère sur chaque colone de pileup
            window_depths.append(pileupcolumn.n)#ajoute le nb de lecture couvrant cette position à la liste window_depths

        if window_depths:#calcul la profondeur moyenne pour chaque sequence
45
            sys.stderr.write("\t\t entering wd thread numero %s\n" % numero_de_thread)
46
            avg_depth = sum(window_depths) / len(window_depths)
47 48
            sys.stdout.write(f"{seq}:{pos_start}-{pos_end}: {avg_depth}\n")            
            output_file.write(f"{seq}:{pos_start}-{pos_end}: {avg_depth}\n")
49
           
50
    sys.stderr.write("\t leaving calcul_depth_seq thread numero %s\n" % numero_de_thread)
51 52
    
def calcul_depth_all(bamfile, window_size, step_size, num_threads):
53
    sys.stderr.write("entering calcul_depth_all \n")
54 55 56 57
    samfile=pysam.AlignmentFile(bamfile, "rb")
    
    with concurrent.futures.ThreadPoolExecutor(max_workers = num_threads) as executor:
        futures = [] #liste pour stocker les futures
58
        n=0
59
        for seq in samfile.references:
60 61
            n+=1
            future = executor.submit(calcul_depth_seq, bamfile, seq, window_size, step_size, n)
62 63 64 65 66 67
            futures.append(future)
            
        concurrent.futures.wait(futures)
        
        samfile.close()

68 69
#if output_file != sys.stdout:
 #   output_file.close()
70 71 72 73 74 75 76 77

#Programme principal
if len(sys.argv) <2 or bamfile is None:
    print("Usage : File alignement .bam")
    sys.exit(1)

bamfile_path = bamfile
calcul_depth_all(bamfile_path, window_size, step_size, num_threads)
78 79
logging.info('end')
samfile.close()