diff --git a/CNV/test_gpu_mean_depth.py b/CNV/test_gpu_mean_depth.py index 898f36711d3d442478e802688ffa731d1a4691d1..8842e349f27e14f1c2bb6de2c975cdf696e58bfb 100644 --- a/CNV/test_gpu_mean_depth.py +++ b/CNV/test_gpu_mean_depth.py @@ -9,30 +9,36 @@ from pycuda.compiler import SourceModule import pycuda.gpuarray as gpuarray from pycuda.autoinit import context import multiprocessing -from concurrent.futures import ProcessPoolExecutor - +from multiprocessing import Process # Options -try: - opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:') - for opt, arg in opts: - if opt in ("-b"): - bamfile = arg - if opt in ("-w"): - window_size = int(arg) - if opt in ("-s"): - step_size = int(arg) - if opt in ("-o"): - output_file = arg - if opt in ("-e"): - logfile = arg -except getopt.GetoptError: - print('Invalid argument') - sys.exit(1) - -logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s') -logging.info('start') -global seq + +def parse_arguments(): + try: + opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:') + bamfile_path, window_size, step_size, output_file, logfile = None, None, None, None, None + for opt, arg in opts: + if opt in ("-b"): + bamfile_path = arg + if opt in ("-w"): + window_size = int(arg) + if opt in ("-s"): + step_size = int(arg) + if opt in ("-o"): + output_file = arg + if opt in ("-e"): + logfile = arg + return bamfile_path, window_size, step_size, output_file, logfile + except getopt.GetoptError: + print('Invalid argument') + sys.exit(1) + +if __name__ == '__main__': + bamfile_path, window_size, step_size, output_file, logfile = parse_arguments() + logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s') + logging.info('start') + global seq + # Code CUDA mod = SourceModule(""" //Kernel pour calculer la profondeur moyenne brute @@ -160,10 +166,10 @@ def dico_mappabilite(mappability_file): return mappability_dico #Dictionnaire avec les bornes de mappabilité en fonction des positions pour chaque chromosome. def calcul_mappability(seq_length, mappability, chr): - sys.stderr.write("\t Entering calcul_mappability =\n") + sys.stderr.write("\t Entering calcul_mappability \n") map_data = np.zeros(seq_length, dtype=np.float32) sorted_keys = sorted(mappability[chr].keys()) - sys.stderr.write("\t sorted_keys =\n") + #sys.stderr.write("\t sorted_keys =\n") prev_bound = 0 prev_mappability = 0 @@ -178,7 +184,7 @@ def calcul_mappability(seq_length, mappability, chr): for i in range(prev_bound, seq_length): map_data[i] = prev_mappability - sys.stderr.write("\t Leaving calcul_mappability =\n") + sys.stderr.write("\t Leaving calcul_mappability \n") return map_data ############################################# @@ -212,10 +218,10 @@ def calcul_gc_content(seq_length, chr, seq): ############################################## ######<---Fonctions calcul Depth Seq--->###### ############################################## -def calcul_depth_seq(seq_length, bamfile, chr): +def calcul_depth_seq(seq_length, bamfile_path, chr): sys.stderr.write("\t Entering calcul_depth_seq\n") depth_data = np.zeros(seq_length, dtype=np.int32) - for pileupcolumn in bamfile.pileup(): + for pileupcolumn in bamfile_path.pileup(): #sys.stderr.write("%s : %s \n" % (pileupcolumn.reference_pos, pileupcolumn.nsegments)) if pileupcolumn.reference_pos > seq_length: break @@ -227,29 +233,28 @@ def calcul_depth_seq(seq_length, bamfile, chr): ################################# ######<---Fonction main--->###### ################################# -def main_calcul(bamfile, chr, seq_length, window_size, step_size, output_file): +def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_file): sys.stderr.write("\t entering main_calcul\n") global seq - with ProcessPoolExecutor(max_workers=3) as executor: + # Calcul mappability + map_data = Process(target = calcul_mappability, args=(seq_length, mappability, chr)) - # Calcul mappability - future_map_data = executor.submit(calcul_mappability, seq_length, mappability, chr) - - - # Calcul GC - future_gc_data = executor.submit(calcul_gc_content, seq_length, chr, seq) - - - # Calcul depth seq - future_depth_data = executor.submit(calcul_depth_seq, seq_length, bamfile, chr) + # Calcul GC + gc_data = Process(target = calcul_gc_content, args = (seq_length, chr, seq)) - - - map_data = future_map_data.result() - gc_data = future_gc_data.result() - depth_data = future_depth_data.result() + # Calcul depth seq + depth_data = Process(target = calcul_depth_seq, args = (seq_length, bamfile_path, chr)) + + map_data.start() + gc_data.start() + depth_data.start() + + map_data.join() + gc_data.join() + depth_data.join() + # Transférer le tableau NumPy vers CUDA d_depth_data = cuda.mem_alloc(depth_data.nbytes) cuda.memcpy_htod(d_depth_data, depth_data) @@ -325,17 +330,19 @@ def main_calcul(bamfile, chr, seq_length, window_size, step_size, output_file): # Programme principal #Calcul nombre de coeurs max pour le GPU + device = cuda.Device(0) attributes = device.get_attributes() num_cores = attributes[1] print("Nombre de CPU: ", multiprocessing.cpu_count()) print(f"Nombre de coeurs max GPU: {num_cores}") + gc_file = '/work/gad/shared/pipeline/grch38/index/grch38_essential.fa' mappability_file = '/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph' seq = parse_fasta(gc_file) mappability = dico_mappabilite(mappability_file) -#print(attributes) -with pysam.AlignmentFile(bamfile, "rb") as bamfile_handle: + +with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle: for i, seq_length in enumerate(bamfile_handle.lengths): chr = bamfile_handle.references[i] sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))