Optimisation of some functions

676b420d · Theo Serralta · c68a9914 · 676b420d
Commit 676b420d authored Aug 05, 2024 by Theo Serralta
Show whitespace changes
Inline Side-by-side

Showing with 214 additions and 89 deletions

test_gpu_time_normalisation.py CNV/test/test_gpu_time_normalisation.py +214 -89

No files found.
--- a/CNV/test/test_gpu_time_normalisation.py
+++ b/CNV/test/test_gpu_time_normalisation.py
@@ -185,6 +185,35 @@ __global__ void calcul_map_kernel(float *map_data, int seq_length, int window_si
    }
 }

+// Kernel pour filtrer selon le gc
+
+__global__ void filter_read_gc_kernel(float *gc_results, float *depth_results, int n, float *gc_35, float *gc_40, float *gc_45, float *gc_50, float *gc_55) {
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (idx < n) {
+        float gc_value = gc_results[idx];
+        float depth_value = depth_results[idx];
+        int gc_value_rounded = round(gc_value);
+
+        if (gc_value_rounded == 35) {
+            int pos = atomicAdd((int*)&gc_35[0], 1);
+            gc_35[pos + 1] = depth_value;
+        } else if (gc_value_rounded == 40) {
+            int pos = atomicAdd((int*)&gc_40[0], 1);
+            gc_40[pos + 1] = depth_value;
+        } else if (gc_value_rounded == 45) {
+            int pos = atomicAdd((int*)&gc_45[0], 1);
+            gc_45[pos + 1] = depth_value;
+        } else if (gc_value_rounded == 50) {
+            int pos = atomicAdd((int*)&gc_50[0], 1);
+            gc_50[pos + 1] = depth_value;
+        } else if (gc_value_rounded == 55) {
+            int pos = atomicAdd((int*)&gc_55[0], 1);
+            gc_55[pos + 1] = depth_value;
+        }
+    }
+}
+

 // Kernel pour calculer la lecture de profondeur corrigee par la mappabilitee

@@ -218,7 +247,7 @@ __global__ void normalize_depth_kernel(float *depth_correction, float *gc_result

 // Kernel pour calculer le ratio par window

-__global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
+__global__ void ratio_par_window_kernel(float *depth_results, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx < seq_length - window_size + 1) {
@@ -227,7 +256,7 @@ __global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int se
    }
 }

-// Kernel pour calculer le ratio normalisé par window
+// Kernel pour calculer le ratio normalise par window

 __global__ void ratio_par_window_norm_kernel(float *depth_normalize_val, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -267,6 +296,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio_norm, float mean_ratio,
 calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
 calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
 calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
+filter_read_gc_kernel_cuda = mod.get_function("filter_read_gc_kernel")
 calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
 normalize_depth_kernel_cuda = mod.get_function("normalize_depth_kernel")
 ratio_par_window_kernel_cuda = mod.get_function("ratio_par_window_kernel")
@@ -1112,48 +1142,98 @@ def filter_read_gc(gc_results, depth_results, chr):
    logging.info(f"Entering filter_read_gc for {chr}")
    start_time = time.time()

-    gc_35 = []
-    gc_40 = []
-    gc_45 = []
-    gc_50 = []
-    gc_55 = []
+    gc_35, gc_40, gc_45, gc_50, gc_55 = [], [], [], [], []

    for depth_value, gc_value in zip(depth_results, gc_results):
-        if round(gc_value) == 35:
+        gc_value_rounded = round(gc_value)
+        if gc_value_rounded == 35:
            gc_35.append(depth_value)
-        elif round(gc_value) == 40:
+        elif gc_value_rounded == 40:
            gc_40.append(depth_value)
-        elif round(gc_value) == 45:
+        elif gc_value_rounded == 45:
            gc_45.append(depth_value)
-        elif round(gc_value) == 50:
+        elif gc_value_rounded == 50:
            gc_50.append(depth_value)
-        elif round(gc_value) == 55:
+        elif gc_value_rounded == 55:
            gc_55.append(depth_value)
    
-    gc_35 = np.array(gc_35)
-    gc_40 = np.array(gc_40)
-    gc_45 = np.array(gc_45)
-    gc_50 = np.array(gc_50
-    gc_55 = np.array(gc_55)
-    
+    gc_35, gc_40, gc_45, gc_50, gc_55 = map(np.array, [gc_35, gc_40, gc_45, gc_50, gc_55])
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
    
+    logging.info(f"gc_35 = {gc_35}") 
+
    return gc_35, gc_40, gc_45, gc_50, gc_55
    
-def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3):
+def filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr):
+    logging.info(f"Entering filter_read_gc_gpu for {chr}")
+    start_time = time.time()
+    
+    n = len(gc_results)
+    max_size = n + 1
+    
+    gc_35_data = np.zeros(max_size, dtype = np.float32)
+    gc_40_data = np.zeros(max_size, dtype = np.float32)
+    gc_45_data = np.zeros(max_size, dtype = np.float32)
+    gc_50_data = np.zeros(max_size, dtype = np.float32)
+    gc_55_data = np.zeros(max_size, dtype = np.float32)
+    
+    gc_35_gpu = cuda.mem_alloc(gc_35_data.nbytes)
+    gc_40_gpu = cuda.mem_alloc(gc_40_data.nbytes)
+    gc_45_gpu = cuda.mem_alloc(gc_45_data.nbytes)
+    gc_50_gpu = cuda.mem_alloc(gc_50_data.nbytes)
+    gc_55_gpu = cuda.mem_alloc(gc_55_data.nbytes)
+    
+    cuda.memcpy_htod(gc_35_gpu, gc_35_data)
+    cuda.memcpy_htod(gc_40_gpu, gc_40_data)
+    cuda.memcpy_htod(gc_45_gpu, gc_45_data)
+    cuda.memcpy_htod(gc_50_gpu, gc_50_data)
+    cuda.memcpy_htod(gc_55_gpu, gc_55_data)
+    
+    
+    filter_read_gc_kernel_cuda(d_gc_results, d_depth_results, np.int32(n), gc_35_gpu, gc_40_gpu, gc_45_gpu, gc_50_gpu, gc_55_gpu, block=(block_size, 1, 1), grid=(grid_size, 1))
+
+    # Copy back results
+    gc_35 = np.zeros(max_size, dtype=np.float32)
+    gc_40 = np.zeros(max_size, dtype=np.float32)
+    gc_45 = np.zeros(max_size, dtype=np.float32)
+    gc_50 = np.zeros(max_size, dtype=np.float32)
+    gc_55 = np.zeros(max_size, dtype=np.float32)
+    
+    cuda.memcpy_dtoh(gc_35, gc_35_gpu)
+    cuda.memcpy_dtoh(gc_40, gc_40_gpu)
+    cuda.memcpy_dtoh(gc_45, gc_45_gpu)
+    cuda.memcpy_dtoh(gc_50, gc_50_gpu)
+    cuda.memcpy_dtoh(gc_55, gc_55_gpu)
+    
+    gc_35 = gc_35[1:int(gc_35[0])+1]
+    gc_40 = gc_40[1:int(gc_40[0])+1]
+    gc_45 = gc_45[1:int(gc_45[0])+1]
+    gc_50 = gc_50[1:int(gc_50[0])+1]
+    gc_55 = gc_55[1:int(gc_55[0])+1]
+
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)") 
+
+    logging.info(f"gc_35 = {gc_35}")
+    
+    return [gc_35, gc_40, gc_45, gc_50, gc_55]
+
+def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr):
    logging.info(f"Entering calc_polynom for {chr}")
    start_time = time.time()
    
-    polynomials =[]
+    polynomials = []
    depth_arrays = [gc_35, gc_40, gc_45, gc_50, gc_55]

    for depths in depth_arrays:
        if len(depths) > 0:
-            indices = np.arange(len(depths))
-            p = Polynomial.fit(indices, depths, degree)
+            x = np.arange(len(depths))  # Créer un tableau d'indices pour les abscisses
+            coeffs = np.polyfit(x, depths, 3)  # Ajuster un polynôme de degré 3
+            p = np.poly1d(coeffs)  # Créer un polynôme à partir des coefficients
            polynomials.append(p)
        else:
            polynomials.append(None)
@@ -1163,7 +1243,7 @@ def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3):
    logging.info(f"Leaving calc_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)") 
    return polynomials

-def calc_mean_chr(depth_results, chr) 
+def calc_mean_chr(depth_results, chr):
    logging.info(f"Entering calc_mean_chr for {chr}")
    start_time = time.time()   
    
@@ -1177,30 +1257,67 @@ def calc_mean_chr(depth_results, chr)

    end_time = time.time()
    elapsed_time = end_time - start_time
-    logging.info(f"Leaving dinf_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")    
+    logging.info(f"Leaving calc_mean_chr for {chr} (Time taken: {elapsed_time:.4f} seconds)")    
    
    return mean_chr

-def find_polynom(ratio_par_window_results, chr, polynomials
+def find_polynom(ratio_par_window_results, chr, polynomials, depth_results):
+    logging.info(f"Entering find_polynom for {chr}")
+    start_time = time.time()
+    
+    # Convert ratio_par_window_results to numpy array
    ratio_par_window_results = np.array(ratio_par_window_results)
    cn = ratio_par_window_results * 2
-    cn_to_test = []
+    cn_to_test = cn[(cn >= 1.5) & (cn < 2.5)]
+    cn_unique = np.unique(cn_to_test)

-    for i in cn:
-        if i >= 1.5 and i < 2.5:
-            cn_to_test.append(i)
+    best_polynom = None
+    best_knorm_diff = float('inf')
    
-    cn_unique = np.unique(cn_to_test)
+    # Precompute k / 2 for each unique cn value
+    cn_unique_div2 = cn_unique / 2
+
+    # Iterate over each polynomial
+    for polynom in polynomials:
+        logging.info(f"polynom = {polynom}\tpolynomials = {polynomials}\tcn_unique = {cn_unique}")
+        if polynom is not None:
+            # Iterate over each depth result
+            for RCi in depth_results:
+                # Iterate over each unique cn value
+                for k, k_div2 in zip(cn_unique, cn_unique_div2):
+                    knorm = polynom(k) * k_div2
+                    knorm_diff = abs(knorm - RCi)
+                    
+                    if knorm_diff < best_knorm_diff:
+                        best_knorm_diff = knorm_diff
+                        best_polynom = polynom
+
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    logging.info(f"Leaving find_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")    
    
+    return best_polynom 
    
-    knorm = np.argmin(cn_unique) * abs(polynomial
+    return best_polynom

-def normalisation(
+def normalize(depth_results, best_polynom, map_results):
+    logging.info("Entering normalization")
+    start_time = time.time()
    
-    normalize_deth = []
+    normalize_depth_results = []
+    for i in range(len(depth_results)):
+        if depth_results[i] != 0:
+            normalized_value = depth_results[i] / (best_polynom(i) * map_results[i]) if best_polynom is not None else depth_results[i]
+            #logging.info(f"normalized_value = {normalized_value}\ti = {i}\tdepth_results[i] = {depth_results[i]}\tbest_polynom(i) = {best_polynom(i)}\tmap_results[i] = {map_results[i]}")
+            normalize_depth_results.append(normalized_value)
+        else:
+            normalize_depth_results.append(depth_results[i])
+    
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    logging.info(f"Leaving normalization (Time taken: {elapsed_time:.4f} seconds)") 
    
-    for i in depth_results:
-        normalize_depth[i] = depth_results[i] / (polynom * map_data[i])
+    return np.array(normalize_depth_results)

 def main_calcul(
    bamfile_path,
@@ -1363,14 +1480,14 @@ def main_calcul(
        "\t depth_correction_results.nbytes = %s\n" % depth_correction_results.nbytes
    )

-    d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
-    sys.stderr.write(
-        "\t d_normalize_depth_results = %s\n"
-        % d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results))
-    )
-    sys.stderr.write(
-        "\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes
-    )
+    #d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
+    #sys.stderr.write(
+     #   "\t d_normalize_depth_results = %s\n"
+      #  % d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results))
+    #)
+    #sys.stderr.write(
+     #   "\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes
+    #)

    d_ratio_par_window_results = cuda.mem_alloc(ratio_par_window_results.nbytes)
    sys.stderr.write(
@@ -1447,17 +1564,17 @@ def main_calcul(
    )
    sys.stderr.write("\t appel fonction calcul_map_kernel_cuda\n")

-    calcul_depth_correction_kernel_cuda(
-        d_depth_results,
-        d_map_results,
-        np.int32(seq_length),
-        np.int32(window_size),
-        np.int32(step_size),
-        d_depth_correction_results,
-        block=(block_size, 1, 1),
-        grid=(grid_size, 1),
-    )
-    sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")
+    #calcul_depth_correction_kernel_cuda(
+     #   d_depth_results,
+      #  d_map_results,
+       # np.int32(seq_length),
+        #np.int32(window_size),
+        #np.int32(step_size),
+        #d_depth_correction_results,
+        #block=(block_size, 1, 1),
+        #grid=(grid_size, 1),
+    #)
+    #sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")

    context.synchronize()

@@ -1478,21 +1595,22 @@ def main_calcul(
        "\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n"
    )

-    cuda.memcpy_dtoh(
-        depth_correction_results, d_depth_correction_results
-    )  # cuda.memcpy_dtoh(dest, src)
-    sys.stderr.write(
-        "\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n"
-    )
+   # cuda.memcpy_dtoh(
+    #    depth_correction_results, d_depth_correction_results
+   # )  # cuda.memcpy_dtoh(dest, src)
+    #sys.stderr.write(
+     #   "\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n"
+    #)

    ### NORMALISATION###

    #Appel fonction read_depth selon gc
    sys.stderr.write("\t appel fonctions filter_results\n")
    gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr)
+    gc_arrays = filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr)

    #Appel fonction calcul polynomes
-    polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3)
+    polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr)
    
    #Appel fonction calcul moyenne
    mean_chr = calc_mean_chr(depth_results, chr)
@@ -1515,41 +1633,48 @@ def main_calcul(
    # Copier les resultats ratio depuis le peripherique CUDA vers l'hote
    cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
    
+    #Appel fonction meilleur polynome
+    best_polynom = find_polynom(ratio_par_window_results, chr, polynomials, depth_results)
+    
+    #Appel fonction normalisation
+    normalize_depth_results = normalize(depth_results, best_polynom, map_results)
+    
    # Appel fonctions medianes
-    sys.stderr.write("\t appel fonctions calcul medianes\n")
-    m = calcul_med_total(depth_correction_results, chr)
-    gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
+    #sys.stderr.write("\t appel fonctions calcul medianes\n")
+    #m = calcul_med_total(depth_correction_results, chr)
+    #gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)

    # Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
-    sys.stderr.write("\t Conversion medianes en tableau numpy\n")
-    gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
-    for gc, median in gc_to_median.items():
-        gc_to_median_array[int(gc)] = median
+    #sys.stderr.write("\t Conversion medianes en tableau numpy\n")
+    #gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
+    #for gc, median in gc_to_median.items():
+     #   gc_to_median_array[int(gc)] = median

    # Allouer de la memoire pour gc_to_median sur le peripherique CUDA
-    sys.stderr.write("\t Allocation mémoire médianes GPU\n")
-    d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
-    cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)
+ #   sys.stderr.write("\t Allocation mémoire médianes GPU\n")
+  #  d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
+   # cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)

    # Appeler le kernel de normalisation
-    normalize_depth_kernel_cuda(
-        d_depth_correction_results,
-        d_gc_results,
-        np.float32(m),
-        d_gc_to_median,
-        np.int32(seq_length),
-        np.int32(window_size),
-        np.int32(step_size),
-        d_normalize_depth_results,
-        block=(block_size, 1, 1),
-        grid=(grid_size, 1),
-    )
-    sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
-
-    context.synchronize()
-
-    # Copier les resultats normalises depuis le peripherique CUDA vers l'hote
-    cuda.memcpy_dtoh(normalize_depth_results, d_normalize_depth_results)
+    #normalize_depth_kernel_cuda(
+      #  d_depth_correction_results,
+     #   d_gc_results,
+     #   np.float32(m),
+     #   d_gc_to_median,
+     #   np.int32(seq_length),
+     #   np.int32(window_size),
+     #   np.int32(step_size),
+     #   d_normalize_depth_results,
+     #   block=(block_size, 1, 1),
+     #   grid=(grid_size, 1),
+    #)
+    #sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
+
+    #context.synchronize()
+
+    #Copier les resultats normalises depuis l'hôte vers le peripherique CUDA
+    d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
+    cuda.memcpy_htod(d_normalize_depth_results, normalize_depth_results)
    
    ### Ratio par window###