Commit 676b420d authored by Theo Serralta's avatar Theo Serralta

Optimisation of some functions

parent c68a9914
...@@ -185,6 +185,35 @@ __global__ void calcul_map_kernel(float *map_data, int seq_length, int window_si ...@@ -185,6 +185,35 @@ __global__ void calcul_map_kernel(float *map_data, int seq_length, int window_si
} }
} }
// Kernel pour filtrer selon le gc
__global__ void filter_read_gc_kernel(float *gc_results, float *depth_results, int n, float *gc_35, float *gc_40, float *gc_45, float *gc_50, float *gc_55) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {
float gc_value = gc_results[idx];
float depth_value = depth_results[idx];
int gc_value_rounded = round(gc_value);
if (gc_value_rounded == 35) {
int pos = atomicAdd((int*)&gc_35[0], 1);
gc_35[pos + 1] = depth_value;
} else if (gc_value_rounded == 40) {
int pos = atomicAdd((int*)&gc_40[0], 1);
gc_40[pos + 1] = depth_value;
} else if (gc_value_rounded == 45) {
int pos = atomicAdd((int*)&gc_45[0], 1);
gc_45[pos + 1] = depth_value;
} else if (gc_value_rounded == 50) {
int pos = atomicAdd((int*)&gc_50[0], 1);
gc_50[pos + 1] = depth_value;
} else if (gc_value_rounded == 55) {
int pos = atomicAdd((int*)&gc_55[0], 1);
gc_55[pos + 1] = depth_value;
}
}
}
// Kernel pour calculer la lecture de profondeur corrigee par la mappabilitee // Kernel pour calculer la lecture de profondeur corrigee par la mappabilitee
...@@ -218,7 +247,7 @@ __global__ void normalize_depth_kernel(float *depth_correction, float *gc_result ...@@ -218,7 +247,7 @@ __global__ void normalize_depth_kernel(float *depth_correction, float *gc_result
// Kernel pour calculer le ratio par window // Kernel pour calculer le ratio par window
__global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) { __global__ void ratio_par_window_kernel(float *depth_results, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x; int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) { if (idx < seq_length - window_size + 1) {
...@@ -227,7 +256,7 @@ __global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int se ...@@ -227,7 +256,7 @@ __global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int se
} }
} }
// Kernel pour calculer le ratio normalisé par window // Kernel pour calculer le ratio normalise par window
__global__ void ratio_par_window_norm_kernel(float *depth_normalize_val, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) { __global__ void ratio_par_window_norm_kernel(float *depth_normalize_val, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x; int idx = threadIdx.x + blockIdx.x * blockDim.x;
...@@ -267,6 +296,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio_norm, float mean_ratio, ...@@ -267,6 +296,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio_norm, float mean_ratio,
calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel") calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel") calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel") calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
filter_read_gc_kernel_cuda = mod.get_function("filter_read_gc_kernel")
calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel") calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
normalize_depth_kernel_cuda = mod.get_function("normalize_depth_kernel") normalize_depth_kernel_cuda = mod.get_function("normalize_depth_kernel")
ratio_par_window_kernel_cuda = mod.get_function("ratio_par_window_kernel") ratio_par_window_kernel_cuda = mod.get_function("ratio_par_window_kernel")
...@@ -1112,48 +1142,98 @@ def filter_read_gc(gc_results, depth_results, chr): ...@@ -1112,48 +1142,98 @@ def filter_read_gc(gc_results, depth_results, chr):
logging.info(f"Entering filter_read_gc for {chr}") logging.info(f"Entering filter_read_gc for {chr}")
start_time = time.time() start_time = time.time()
gc_35 = [] gc_35, gc_40, gc_45, gc_50, gc_55 = [], [], [], [], []
gc_40 = []
gc_45 = []
gc_50 = []
gc_55 = []
for depth_value, gc_value in zip(depth_results, gc_results): for depth_value, gc_value in zip(depth_results, gc_results):
if round(gc_value) == 35: gc_value_rounded = round(gc_value)
if gc_value_rounded == 35:
gc_35.append(depth_value) gc_35.append(depth_value)
elif round(gc_value) == 40: elif gc_value_rounded == 40:
gc_40.append(depth_value) gc_40.append(depth_value)
elif round(gc_value) == 45: elif gc_value_rounded == 45:
gc_45.append(depth_value) gc_45.append(depth_value)
elif round(gc_value) == 50: elif gc_value_rounded == 50:
gc_50.append(depth_value) gc_50.append(depth_value)
elif round(gc_value) == 55: elif gc_value_rounded == 55:
gc_55.append(depth_value) gc_55.append(depth_value)
gc_35 = np.array(gc_35) gc_35, gc_40, gc_45, gc_50, gc_55 = map(np.array, [gc_35, gc_40, gc_45, gc_50, gc_55])
gc_40 = np.array(gc_40)
gc_45 = np.array(gc_45) end_time = time.time()
gc_50 = np.array(gc_50 elapsed_time = end_time - start_time
gc_55 = np.array(gc_55) logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"gc_35 = {gc_35}")
return gc_35, gc_40, gc_45, gc_50, gc_55
def filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr):
logging.info(f"Entering filter_read_gc_gpu for {chr}")
start_time = time.time()
n = len(gc_results)
max_size = n + 1
gc_35_data = np.zeros(max_size, dtype = np.float32)
gc_40_data = np.zeros(max_size, dtype = np.float32)
gc_45_data = np.zeros(max_size, dtype = np.float32)
gc_50_data = np.zeros(max_size, dtype = np.float32)
gc_55_data = np.zeros(max_size, dtype = np.float32)
gc_35_gpu = cuda.mem_alloc(gc_35_data.nbytes)
gc_40_gpu = cuda.mem_alloc(gc_40_data.nbytes)
gc_45_gpu = cuda.mem_alloc(gc_45_data.nbytes)
gc_50_gpu = cuda.mem_alloc(gc_50_data.nbytes)
gc_55_gpu = cuda.mem_alloc(gc_55_data.nbytes)
cuda.memcpy_htod(gc_35_gpu, gc_35_data)
cuda.memcpy_htod(gc_40_gpu, gc_40_data)
cuda.memcpy_htod(gc_45_gpu, gc_45_data)
cuda.memcpy_htod(gc_50_gpu, gc_50_data)
cuda.memcpy_htod(gc_55_gpu, gc_55_data)
filter_read_gc_kernel_cuda(d_gc_results, d_depth_results, np.int32(n), gc_35_gpu, gc_40_gpu, gc_45_gpu, gc_50_gpu, gc_55_gpu, block=(block_size, 1, 1), grid=(grid_size, 1))
# Copy back results
gc_35 = np.zeros(max_size, dtype=np.float32)
gc_40 = np.zeros(max_size, dtype=np.float32)
gc_45 = np.zeros(max_size, dtype=np.float32)
gc_50 = np.zeros(max_size, dtype=np.float32)
gc_55 = np.zeros(max_size, dtype=np.float32)
cuda.memcpy_dtoh(gc_35, gc_35_gpu)
cuda.memcpy_dtoh(gc_40, gc_40_gpu)
cuda.memcpy_dtoh(gc_45, gc_45_gpu)
cuda.memcpy_dtoh(gc_50, gc_50_gpu)
cuda.memcpy_dtoh(gc_55, gc_55_gpu)
gc_35 = gc_35[1:int(gc_35[0])+1]
gc_40 = gc_40[1:int(gc_40[0])+1]
gc_45 = gc_45[1:int(gc_45[0])+1]
gc_50 = gc_50[1:int(gc_50[0])+1]
gc_55 = gc_55[1:int(gc_55[0])+1]
end_time = time.time() end_time = time.time()
elapsed_time = end_time - start_time elapsed_time = end_time - start_time
logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)") logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return gc_35, gc_40, gc_45, gc_50, gc_55 logging.info(f"gc_35 = {gc_35}")
return [gc_35, gc_40, gc_45, gc_50, gc_55]
def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3): def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr):
logging.info(f"Entering calc_polynom for {chr}") logging.info(f"Entering calc_polynom for {chr}")
start_time = time.time() start_time = time.time()
polynomials =[] polynomials = []
depth_arrays = [gc_35, gc_40, gc_45, gc_50, gc_55] depth_arrays = [gc_35, gc_40, gc_45, gc_50, gc_55]
for depths in depth_arrays: for depths in depth_arrays:
if len(depths) > 0: if len(depths) > 0:
indices = np.arange(len(depths)) x = np.arange(len(depths)) # Créer un tableau d'indices pour les abscisses
p = Polynomial.fit(indices, depths, degree) coeffs = np.polyfit(x, depths, 3) # Ajuster un polynôme de degré 3
p = np.poly1d(coeffs) # Créer un polynôme à partir des coefficients
polynomials.append(p) polynomials.append(p)
else: else:
polynomials.append(None) polynomials.append(None)
...@@ -1163,7 +1243,7 @@ def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3): ...@@ -1163,7 +1243,7 @@ def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3):
logging.info(f"Leaving calc_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)") logging.info(f"Leaving calc_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return polynomials return polynomials
def calc_mean_chr(depth_results, chr) def calc_mean_chr(depth_results, chr):
logging.info(f"Entering calc_mean_chr for {chr}") logging.info(f"Entering calc_mean_chr for {chr}")
start_time = time.time() start_time = time.time()
...@@ -1177,31 +1257,68 @@ def calc_mean_chr(depth_results, chr) ...@@ -1177,31 +1257,68 @@ def calc_mean_chr(depth_results, chr)
end_time = time.time() end_time = time.time()
elapsed_time = end_time - start_time elapsed_time = end_time - start_time
logging.info(f"Leaving dinf_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)") logging.info(f"Leaving calc_mean_chr for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return mean_chr return mean_chr
def find_polynom(ratio_par_window_results, chr, polynomials def find_polynom(ratio_par_window_results, chr, polynomials, depth_results):
logging.info(f"Entering find_polynom for {chr}")
start_time = time.time()
# Convert ratio_par_window_results to numpy array
ratio_par_window_results = np.array(ratio_par_window_results) ratio_par_window_results = np.array(ratio_par_window_results)
cn = ratio_par_window_results * 2 cn = ratio_par_window_results * 2
cn_to_test = [] cn_to_test = cn[(cn >= 1.5) & (cn < 2.5)]
for i in cn:
if i >= 1.5 and i < 2.5:
cn_to_test.append(i)
cn_unique = np.unique(cn_to_test) cn_unique = np.unique(cn_to_test)
best_polynom = None
best_knorm_diff = float('inf')
# Precompute k / 2 for each unique cn value
knorm = np.argmin(cn_unique) * abs(polynomial cn_unique_div2 = cn_unique / 2
# Iterate over each polynomial
for polynom in polynomials:
logging.info(f"polynom = {polynom}\tpolynomials = {polynomials}\tcn_unique = {cn_unique}")
if polynom is not None:
# Iterate over each depth result
for RCi in depth_results:
# Iterate over each unique cn value
for k, k_div2 in zip(cn_unique, cn_unique_div2):
knorm = polynom(k) * k_div2
knorm_diff = abs(knorm - RCi)
if knorm_diff < best_knorm_diff:
best_knorm_diff = knorm_diff
best_polynom = polynom
def normalisation( end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving find_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return best_polynom
return best_polynom
normalize_deth = [] def normalize(depth_results, best_polynom, map_results):
logging.info("Entering normalization")
start_time = time.time()
normalize_depth_results = []
for i in range(len(depth_results)):
if depth_results[i] != 0:
normalized_value = depth_results[i] / (best_polynom(i) * map_results[i]) if best_polynom is not None else depth_results[i]
#logging.info(f"normalized_value = {normalized_value}\ti = {i}\tdepth_results[i] = {depth_results[i]}\tbest_polynom(i) = {best_polynom(i)}\tmap_results[i] = {map_results[i]}")
normalize_depth_results.append(normalized_value)
else:
normalize_depth_results.append(depth_results[i])
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving normalization (Time taken: {elapsed_time:.4f} seconds)")
return np.array(normalize_depth_results)
for i in depth_results:
normalize_depth[i] = depth_results[i] / (polynom * map_data[i])
def main_calcul( def main_calcul(
bamfile_path, bamfile_path,
chr, chr,
...@@ -1363,14 +1480,14 @@ def main_calcul( ...@@ -1363,14 +1480,14 @@ def main_calcul(
"\t depth_correction_results.nbytes = %s\n" % depth_correction_results.nbytes "\t depth_correction_results.nbytes = %s\n" % depth_correction_results.nbytes
) )
d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes) #d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
sys.stderr.write( #sys.stderr.write(
"\t d_normalize_depth_results = %s\n" # "\t d_normalize_depth_results = %s\n"
% d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results)) # % d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results))
) #)
sys.stderr.write( #sys.stderr.write(
"\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes # "\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes
) #)
d_ratio_par_window_results = cuda.mem_alloc(ratio_par_window_results.nbytes) d_ratio_par_window_results = cuda.mem_alloc(ratio_par_window_results.nbytes)
sys.stderr.write( sys.stderr.write(
...@@ -1447,17 +1564,17 @@ def main_calcul( ...@@ -1447,17 +1564,17 @@ def main_calcul(
) )
sys.stderr.write("\t appel fonction calcul_map_kernel_cuda\n") sys.stderr.write("\t appel fonction calcul_map_kernel_cuda\n")
calcul_depth_correction_kernel_cuda( #calcul_depth_correction_kernel_cuda(
d_depth_results, # d_depth_results,
d_map_results, # d_map_results,
np.int32(seq_length), # np.int32(seq_length),
np.int32(window_size), #np.int32(window_size),
np.int32(step_size), #np.int32(step_size),
d_depth_correction_results, #d_depth_correction_results,
block=(block_size, 1, 1), #block=(block_size, 1, 1),
grid=(grid_size, 1), #grid=(grid_size, 1),
) #)
sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n") #sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")
context.synchronize() context.synchronize()
...@@ -1478,21 +1595,22 @@ def main_calcul( ...@@ -1478,21 +1595,22 @@ def main_calcul(
"\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n" "\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n"
) )
cuda.memcpy_dtoh( # cuda.memcpy_dtoh(
depth_correction_results, d_depth_correction_results # depth_correction_results, d_depth_correction_results
) # cuda.memcpy_dtoh(dest, src) # ) # cuda.memcpy_dtoh(dest, src)
sys.stderr.write( #sys.stderr.write(
"\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n" # "\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n"
) #)
### NORMALISATION### ### NORMALISATION###
#Appel fonction read_depth selon gc #Appel fonction read_depth selon gc
sys.stderr.write("\t appel fonctions filter_results\n") sys.stderr.write("\t appel fonctions filter_results\n")
gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr) gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr)
gc_arrays = filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr)
#Appel fonction calcul polynomes #Appel fonction calcul polynomes
polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3) polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr)
#Appel fonction calcul moyenne #Appel fonction calcul moyenne
mean_chr = calc_mean_chr(depth_results, chr) mean_chr = calc_mean_chr(depth_results, chr)
...@@ -1515,42 +1633,49 @@ def main_calcul( ...@@ -1515,42 +1633,49 @@ def main_calcul(
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote # Copier les resultats ratio depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results) cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
#Appel fonction meilleur polynome
best_polynom = find_polynom(ratio_par_window_results, chr, polynomials, depth_results)
#Appel fonction normalisation
normalize_depth_results = normalize(depth_results, best_polynom, map_results)
# Appel fonctions medianes # Appel fonctions medianes
sys.stderr.write("\t appel fonctions calcul medianes\n") #sys.stderr.write("\t appel fonctions calcul medianes\n")
m = calcul_med_total(depth_correction_results, chr) #m = calcul_med_total(depth_correction_results, chr)
gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr) #gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA # Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
sys.stderr.write("\t Conversion medianes en tableau numpy\n") #sys.stderr.write("\t Conversion medianes en tableau numpy\n")
gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32) #gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
for gc, median in gc_to_median.items(): #for gc, median in gc_to_median.items():
gc_to_median_array[int(gc)] = median # gc_to_median_array[int(gc)] = median
# Allouer de la memoire pour gc_to_median sur le peripherique CUDA # Allouer de la memoire pour gc_to_median sur le peripherique CUDA
sys.stderr.write("\t Allocation mémoire médianes GPU\n") # sys.stderr.write("\t Allocation mémoire médianes GPU\n")
d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes) # d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
cuda.memcpy_htod(d_gc_to_median, gc_to_median_array) # cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)
# Appeler le kernel de normalisation # Appeler le kernel de normalisation
normalize_depth_kernel_cuda( #normalize_depth_kernel_cuda(
d_depth_correction_results, # d_depth_correction_results,
d_gc_results, # d_gc_results,
np.float32(m), # np.float32(m),
d_gc_to_median, # d_gc_to_median,
np.int32(seq_length), # np.int32(seq_length),
np.int32(window_size), # np.int32(window_size),
np.int32(step_size), # np.int32(step_size),
d_normalize_depth_results, # d_normalize_depth_results,
block=(block_size, 1, 1), # block=(block_size, 1, 1),
grid=(grid_size, 1), # grid=(grid_size, 1),
) #)
sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n") #sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
context.synchronize() #context.synchronize()
# Copier les resultats normalises depuis le peripherique CUDA vers l'hote #Copier les resultats normalises depuis l'hôte vers le peripherique CUDA
cuda.memcpy_dtoh(normalize_depth_results, d_normalize_depth_results) d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
cuda.memcpy_htod(d_normalize_depth_results, normalize_depth_results)
### Ratio par window### ### Ratio par window###
# Appel fonction moyenne # Appel fonction moyenne
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment