Change multiprocessing (don't work)

c832a6ba · Theo Serralta · 10941d40 · c832a6ba
Commit c832a6ba authored May 29, 2024 by Theo Serralta
Hide whitespace changes
Inline Side-by-side

Showing with 53 additions and 46 deletions

test_gpu_mean_depth.py CNV/test_gpu_mean_depth.py +53 -46

No files found.
--- a/CNV/test_gpu_mean_depth.py
+++ b/CNV/test_gpu_mean_depth.py
@@ -9,30 +9,36 @@ from pycuda.compiler import SourceModule
 import pycuda.gpuarray as gpuarray
 from pycuda.autoinit import context
 import multiprocessing
-from concurrent.futures import ProcessPoolExecutor
-
+from multiprocessing import Process

 # Options
-try:
-    opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:')
-    for opt, arg in opts:
-        if opt in ("-b"):
-            bamfile = arg
-        if opt in ("-w"):
-            window_size = int(arg)
-        if opt in ("-s"):
-            step_size = int(arg)
-        if opt in ("-o"):
-            output_file = arg
-        if opt in ("-e"):
-            logfile = arg
-except getopt.GetoptError:
-    print('Invalid argument')
-    sys.exit(1)
-
-logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s')
-logging.info('start')
-global seq
+
+def parse_arguments():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:')
+        bamfile_path, window_size, step_size, output_file, logfile = None, None, None, None, None
+        for opt, arg in opts:
+            if opt in ("-b"):
+                bamfile_path = arg
+            if opt in ("-w"):
+                window_size = int(arg)
+            if opt in ("-s"):
+                step_size = int(arg)
+            if opt in ("-o"):
+                output_file = arg
+            if opt in ("-e"):
+                logfile = arg
+        return bamfile_path, window_size, step_size, output_file, logfile
+    except getopt.GetoptError:
+        print('Invalid argument')
+        sys.exit(1)
+
+if __name__ == '__main__':
+    bamfile_path, window_size, step_size, output_file, logfile = parse_arguments()
+    logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s')
+    logging.info('start')
+    global seq
+    
 # Code CUDA
 mod = SourceModule("""
 //Kernel pour calculer la profondeur moyenne brute
@@ -160,10 +166,10 @@ def dico_mappabilite(mappability_file):
    return mappability_dico #Dictionnaire avec les bornes de mappabilité en fonction des positions pour chaque chromosome.
    
 def calcul_mappability(seq_length, mappability, chr):
-    sys.stderr.write("\t Entering calcul_mappability =\n")
+    sys.stderr.write("\t Entering calcul_mappability \n")
    map_data = np.zeros(seq_length, dtype=np.float32)
    sorted_keys = sorted(mappability[chr].keys())
-    sys.stderr.write("\t sorted_keys =\n")
+    #sys.stderr.write("\t sorted_keys =\n")
    
    prev_bound = 0
    prev_mappability = 0
@@ -178,7 +184,7 @@ def calcul_mappability(seq_length, mappability, chr):
    for i in range(prev_bound, seq_length):
        map_data[i] = prev_mappability

-    sys.stderr.write("\t Leaving calcul_mappability =\n")
+    sys.stderr.write("\t Leaving calcul_mappability \n")
    return map_data
 
 #############################################
@@ -212,10 +218,10 @@ def calcul_gc_content(seq_length, chr, seq):
 ##############################################
 ######<---Fonctions calcul Depth Seq--->######
 ##############################################
-def calcul_depth_seq(seq_length, bamfile, chr):
+def calcul_depth_seq(seq_length, bamfile_path, chr):
    sys.stderr.write("\t Entering calcul_depth_seq\n")
    depth_data = np.zeros(seq_length, dtype=np.int32)
-    for pileupcolumn in bamfile.pileup():
+    for pileupcolumn in bamfile_path.pileup():
        #sys.stderr.write("%s : %s \n" % (pileupcolumn.reference_pos, pileupcolumn.nsegments))
        if pileupcolumn.reference_pos > seq_length:
            break
@@ -227,29 +233,28 @@ def calcul_depth_seq(seq_length, bamfile, chr):
 #################################
 ######<---Fonction main--->######
 #################################
-def main_calcul(bamfile, chr, seq_length, window_size, step_size, output_file):
+def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_file):
    sys.stderr.write("\t entering main_calcul\n")
    global seq

-    with ProcessPoolExecutor(max_workers=3) as executor:
+    # Calcul mappability
+    map_data = Process(target = calcul_mappability, args=(seq_length, mappability, chr))

-        # Calcul mappability
-        future_map_data = executor.submit(calcul_mappability, seq_length, mappability, chr)
-
-
-        # Calcul GC
-        future_gc_data = executor.submit(calcul_gc_content, seq_length, chr, seq)
-
-        
-        # Calcul depth seq
-        future_depth_data = executor.submit(calcul_depth_seq, seq_length, bamfile, chr)
+    # Calcul GC
+    gc_data = Process(target = calcul_gc_content, args = (seq_length, chr, seq))

-        
-        
-        map_data = future_map_data.result()
-        gc_data = future_gc_data.result()
-        depth_data = future_depth_data.result()
+    # Calcul depth seq
+    depth_data = Process(target = calcul_depth_seq, args = (seq_length, bamfile_path, chr))
+    
+    map_data.start()
+    gc_data.start()
+    depth_data.start()
+    
+    map_data.join()
+    gc_data.join()
+    depth_data.join()
    
+
    # Transférer le tableau NumPy vers CUDA
    d_depth_data = cuda.mem_alloc(depth_data.nbytes)
    cuda.memcpy_htod(d_depth_data, depth_data)
@@ -325,17 +330,19 @@ def main_calcul(bamfile, chr, seq_length, window_size, step_size, output_file):

 # Programme principal
 #Calcul nombre de coeurs max pour le GPU
+
 device = cuda.Device(0)
 attributes = device.get_attributes()
 num_cores = attributes[1]
 print("Nombre de CPU: ", multiprocessing.cpu_count())
 print(f"Nombre de coeurs max GPU: {num_cores}")
+
 gc_file = '/work/gad/shared/pipeline/grch38/index/grch38_essential.fa'
 mappability_file = '/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph'
 seq = parse_fasta(gc_file)
 mappability = dico_mappabilite(mappability_file)
-#print(attributes)
-with pysam.AlignmentFile(bamfile, "rb") as bamfile_handle:
+
+with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
    for i, seq_length in enumerate(bamfile_handle.lengths):
        chr = bamfile_handle.references[i]
        sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))