Commit 676b420d authored by Theo Serralta's avatar Theo Serralta

Optimisation of some functions

parent c68a9914
......@@ -185,6 +185,35 @@ __global__ void calcul_map_kernel(float *map_data, int seq_length, int window_si
}
}
// Kernel pour filtrer selon le gc
__global__ void filter_read_gc_kernel(float *gc_results, float *depth_results, int n, float *gc_35, float *gc_40, float *gc_45, float *gc_50, float *gc_55) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {
float gc_value = gc_results[idx];
float depth_value = depth_results[idx];
int gc_value_rounded = round(gc_value);
if (gc_value_rounded == 35) {
int pos = atomicAdd((int*)&gc_35[0], 1);
gc_35[pos + 1] = depth_value;
} else if (gc_value_rounded == 40) {
int pos = atomicAdd((int*)&gc_40[0], 1);
gc_40[pos + 1] = depth_value;
} else if (gc_value_rounded == 45) {
int pos = atomicAdd((int*)&gc_45[0], 1);
gc_45[pos + 1] = depth_value;
} else if (gc_value_rounded == 50) {
int pos = atomicAdd((int*)&gc_50[0], 1);
gc_50[pos + 1] = depth_value;
} else if (gc_value_rounded == 55) {
int pos = atomicAdd((int*)&gc_55[0], 1);
gc_55[pos + 1] = depth_value;
}
}
}
// Kernel pour calculer la lecture de profondeur corrigee par la mappabilitee
......@@ -218,7 +247,7 @@ __global__ void normalize_depth_kernel(float *depth_correction, float *gc_result
// Kernel pour calculer le ratio par window
__global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
__global__ void ratio_par_window_kernel(float *depth_results, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
......@@ -227,7 +256,7 @@ __global__ void ratio_par_window_kernel(float *avg_depth, float mean_chr, int se
}
}
// Kernel pour calculer le ratio normalisé par window
// Kernel pour calculer le ratio normalise par window
__global__ void ratio_par_window_norm_kernel(float *depth_normalize_val, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
......@@ -267,6 +296,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio_norm, float mean_ratio,
calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
filter_read_gc_kernel_cuda = mod.get_function("filter_read_gc_kernel")
calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
normalize_depth_kernel_cuda = mod.get_function("normalize_depth_kernel")
ratio_par_window_kernel_cuda = mod.get_function("ratio_par_window_kernel")
......@@ -1112,48 +1142,98 @@ def filter_read_gc(gc_results, depth_results, chr):
logging.info(f"Entering filter_read_gc for {chr}")
start_time = time.time()
gc_35 = []
gc_40 = []
gc_45 = []
gc_50 = []
gc_55 = []
gc_35, gc_40, gc_45, gc_50, gc_55 = [], [], [], [], []
for depth_value, gc_value in zip(depth_results, gc_results):
if round(gc_value) == 35:
gc_value_rounded = round(gc_value)
if gc_value_rounded == 35:
gc_35.append(depth_value)
elif round(gc_value) == 40:
elif gc_value_rounded == 40:
gc_40.append(depth_value)
elif round(gc_value) == 45:
elif gc_value_rounded == 45:
gc_45.append(depth_value)
elif round(gc_value) == 50:
elif gc_value_rounded == 50:
gc_50.append(depth_value)
elif round(gc_value) == 55:
elif gc_value_rounded == 55:
gc_55.append(depth_value)
gc_35 = np.array(gc_35)
gc_40 = np.array(gc_40)
gc_45 = np.array(gc_45)
gc_50 = np.array(gc_50
gc_55 = np.array(gc_55)
gc_35, gc_40, gc_45, gc_50, gc_55 = map(np.array, [gc_35, gc_40, gc_45, gc_50, gc_55])
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"gc_35 = {gc_35}")
return gc_35, gc_40, gc_45, gc_50, gc_55
def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3):
def filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr):
logging.info(f"Entering filter_read_gc_gpu for {chr}")
start_time = time.time()
n = len(gc_results)
max_size = n + 1
gc_35_data = np.zeros(max_size, dtype = np.float32)
gc_40_data = np.zeros(max_size, dtype = np.float32)
gc_45_data = np.zeros(max_size, dtype = np.float32)
gc_50_data = np.zeros(max_size, dtype = np.float32)
gc_55_data = np.zeros(max_size, dtype = np.float32)
gc_35_gpu = cuda.mem_alloc(gc_35_data.nbytes)
gc_40_gpu = cuda.mem_alloc(gc_40_data.nbytes)
gc_45_gpu = cuda.mem_alloc(gc_45_data.nbytes)
gc_50_gpu = cuda.mem_alloc(gc_50_data.nbytes)
gc_55_gpu = cuda.mem_alloc(gc_55_data.nbytes)
cuda.memcpy_htod(gc_35_gpu, gc_35_data)
cuda.memcpy_htod(gc_40_gpu, gc_40_data)
cuda.memcpy_htod(gc_45_gpu, gc_45_data)
cuda.memcpy_htod(gc_50_gpu, gc_50_data)
cuda.memcpy_htod(gc_55_gpu, gc_55_data)
filter_read_gc_kernel_cuda(d_gc_results, d_depth_results, np.int32(n), gc_35_gpu, gc_40_gpu, gc_45_gpu, gc_50_gpu, gc_55_gpu, block=(block_size, 1, 1), grid=(grid_size, 1))
# Copy back results
gc_35 = np.zeros(max_size, dtype=np.float32)
gc_40 = np.zeros(max_size, dtype=np.float32)
gc_45 = np.zeros(max_size, dtype=np.float32)
gc_50 = np.zeros(max_size, dtype=np.float32)
gc_55 = np.zeros(max_size, dtype=np.float32)
cuda.memcpy_dtoh(gc_35, gc_35_gpu)
cuda.memcpy_dtoh(gc_40, gc_40_gpu)
cuda.memcpy_dtoh(gc_45, gc_45_gpu)
cuda.memcpy_dtoh(gc_50, gc_50_gpu)
cuda.memcpy_dtoh(gc_55, gc_55_gpu)
gc_35 = gc_35[1:int(gc_35[0])+1]
gc_40 = gc_40[1:int(gc_40[0])+1]
gc_45 = gc_45[1:int(gc_45[0])+1]
gc_50 = gc_50[1:int(gc_50[0])+1]
gc_55 = gc_55[1:int(gc_55[0])+1]
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"gc_35 = {gc_35}")
return [gc_35, gc_40, gc_45, gc_50, gc_55]
def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr):
logging.info(f"Entering calc_polynom for {chr}")
start_time = time.time()
polynomials =[]
polynomials = []
depth_arrays = [gc_35, gc_40, gc_45, gc_50, gc_55]
for depths in depth_arrays:
if len(depths) > 0:
indices = np.arange(len(depths))
p = Polynomial.fit(indices, depths, degree)
x = np.arange(len(depths)) # Créer un tableau d'indices pour les abscisses
coeffs = np.polyfit(x, depths, 3) # Ajuster un polynôme de degré 3
p = np.poly1d(coeffs) # Créer un polynôme à partir des coefficients
polynomials.append(p)
else:
polynomials.append(None)
......@@ -1163,7 +1243,7 @@ def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3):
logging.info(f"Leaving calc_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return polynomials
def calc_mean_chr(depth_results, chr)
def calc_mean_chr(depth_results, chr):
logging.info(f"Entering calc_mean_chr for {chr}")
start_time = time.time()
......@@ -1177,30 +1257,67 @@ def calc_mean_chr(depth_results, chr)
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving dinf_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"Leaving calc_mean_chr for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return mean_chr
def find_polynom(ratio_par_window_results, chr, polynomials
def find_polynom(ratio_par_window_results, chr, polynomials, depth_results):
logging.info(f"Entering find_polynom for {chr}")
start_time = time.time()
# Convert ratio_par_window_results to numpy array
ratio_par_window_results = np.array(ratio_par_window_results)
cn = ratio_par_window_results * 2
cn_to_test = []
cn_to_test = cn[(cn >= 1.5) & (cn < 2.5)]
cn_unique = np.unique(cn_to_test)
for i in cn:
if i >= 1.5 and i < 2.5:
cn_to_test.append(i)
best_polynom = None
best_knorm_diff = float('inf')
cn_unique = np.unique(cn_to_test)
# Precompute k / 2 for each unique cn value
cn_unique_div2 = cn_unique / 2
# Iterate over each polynomial
for polynom in polynomials:
logging.info(f"polynom = {polynom}\tpolynomials = {polynomials}\tcn_unique = {cn_unique}")
if polynom is not None:
# Iterate over each depth result
for RCi in depth_results:
# Iterate over each unique cn value
for k, k_div2 in zip(cn_unique, cn_unique_div2):
knorm = polynom(k) * k_div2
knorm_diff = abs(knorm - RCi)
if knorm_diff < best_knorm_diff:
best_knorm_diff = knorm_diff
best_polynom = polynom
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving find_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return best_polynom
knorm = np.argmin(cn_unique) * abs(polynomial
return best_polynom
def normalisation(
def normalize(depth_results, best_polynom, map_results):
logging.info("Entering normalization")
start_time = time.time()
normalize_deth = []
normalize_depth_results = []
for i in range(len(depth_results)):
if depth_results[i] != 0:
normalized_value = depth_results[i] / (best_polynom(i) * map_results[i]) if best_polynom is not None else depth_results[i]
#logging.info(f"normalized_value = {normalized_value}\ti = {i}\tdepth_results[i] = {depth_results[i]}\tbest_polynom(i) = {best_polynom(i)}\tmap_results[i] = {map_results[i]}")
normalize_depth_results.append(normalized_value)
else:
normalize_depth_results.append(depth_results[i])
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving normalization (Time taken: {elapsed_time:.4f} seconds)")
for i in depth_results:
normalize_depth[i] = depth_results[i] / (polynom * map_data[i])
return np.array(normalize_depth_results)
def main_calcul(
bamfile_path,
......@@ -1363,14 +1480,14 @@ def main_calcul(
"\t depth_correction_results.nbytes = %s\n" % depth_correction_results.nbytes
)
d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
sys.stderr.write(
"\t d_normalize_depth_results = %s\n"
% d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results))
)
sys.stderr.write(
"\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes
)
#d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
#sys.stderr.write(
# "\t d_normalize_depth_results = %s\n"
# % d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results))
#)
#sys.stderr.write(
# "\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes
#)
d_ratio_par_window_results = cuda.mem_alloc(ratio_par_window_results.nbytes)
sys.stderr.write(
......@@ -1447,17 +1564,17 @@ def main_calcul(
)
sys.stderr.write("\t appel fonction calcul_map_kernel_cuda\n")
calcul_depth_correction_kernel_cuda(
d_depth_results,
d_map_results,
np.int32(seq_length),
np.int32(window_size),
np.int32(step_size),
d_depth_correction_results,
block=(block_size, 1, 1),
grid=(grid_size, 1),
)
sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")
#calcul_depth_correction_kernel_cuda(
# d_depth_results,
# d_map_results,
# np.int32(seq_length),
#np.int32(window_size),
#np.int32(step_size),
#d_depth_correction_results,
#block=(block_size, 1, 1),
#grid=(grid_size, 1),
#)
#sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")
context.synchronize()
......@@ -1478,21 +1595,22 @@ def main_calcul(
"\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n"
)
cuda.memcpy_dtoh(
depth_correction_results, d_depth_correction_results
) # cuda.memcpy_dtoh(dest, src)
sys.stderr.write(
"\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n"
)
# cuda.memcpy_dtoh(
# depth_correction_results, d_depth_correction_results
# ) # cuda.memcpy_dtoh(dest, src)
#sys.stderr.write(
# "\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n"
#)
### NORMALISATION###
#Appel fonction read_depth selon gc
sys.stderr.write("\t appel fonctions filter_results\n")
gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr)
gc_arrays = filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr)
#Appel fonction calcul polynomes
polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr, degree = 3)
polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr)
#Appel fonction calcul moyenne
mean_chr = calc_mean_chr(depth_results, chr)
......@@ -1515,41 +1633,48 @@ def main_calcul(
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
#Appel fonction meilleur polynome
best_polynom = find_polynom(ratio_par_window_results, chr, polynomials, depth_results)
#Appel fonction normalisation
normalize_depth_results = normalize(depth_results, best_polynom, map_results)
# Appel fonctions medianes
sys.stderr.write("\t appel fonctions calcul medianes\n")
m = calcul_med_total(depth_correction_results, chr)
gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
#sys.stderr.write("\t appel fonctions calcul medianes\n")
#m = calcul_med_total(depth_correction_results, chr)
#gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
sys.stderr.write("\t Conversion medianes en tableau numpy\n")
gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
for gc, median in gc_to_median.items():
gc_to_median_array[int(gc)] = median
#sys.stderr.write("\t Conversion medianes en tableau numpy\n")
#gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
#for gc, median in gc_to_median.items():
# gc_to_median_array[int(gc)] = median
# Allouer de la memoire pour gc_to_median sur le peripherique CUDA
sys.stderr.write("\t Allocation mémoire médianes GPU\n")
d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)
# sys.stderr.write("\t Allocation mémoire médianes GPU\n")
# d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
# cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)
# Appeler le kernel de normalisation
normalize_depth_kernel_cuda(
d_depth_correction_results,
d_gc_results,
np.float32(m),
d_gc_to_median,
np.int32(seq_length),
np.int32(window_size),
np.int32(step_size),
d_normalize_depth_results,
block=(block_size, 1, 1),
grid=(grid_size, 1),
)
sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
context.synchronize()
# Copier les resultats normalises depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(normalize_depth_results, d_normalize_depth_results)
#normalize_depth_kernel_cuda(
# d_depth_correction_results,
# d_gc_results,
# np.float32(m),
# d_gc_to_median,
# np.int32(seq_length),
# np.int32(window_size),
# np.int32(step_size),
# d_normalize_depth_results,
# block=(block_size, 1, 1),
# grid=(grid_size, 1),
#)
#sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
#context.synchronize()
#Copier les resultats normalises depuis l'hôte vers le peripherique CUDA
d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
cuda.memcpy_htod(d_normalize_depth_results, normalize_depth_results)
### Ratio par window###
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment