Commit 2d7ecfcf authored by Theo Serralta's avatar Theo Serralta

Add new normalisation method

parent a0f29fab
......@@ -196,6 +196,7 @@ __global__ void filter_read_gc_kernel(float *gc_results, float *depth_results, i
int gc_value_rounded = round(gc_value);
if (gc_value_rounded == 35) {
//printf(gc_value_rounded)
int pos = atomicAdd((int*)&gc_35[0], 1);
gc_35[pos + 1] = depth_value;
} else if (gc_value_rounded == 40) {
......@@ -258,22 +259,30 @@ __global__ void ratio_par_window_kernel(float *depth_results, float mean_chr, in
// Kernel pour calculer le ratio normalise par window
__global__ void ratio_par_window_norm_kernel(float *depth_normalize_val, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) {
__global__ void ratio_par_window_norm_kernel(float *normalize_depth_results, float mean_chr_norm, int seq_length, int window_size, int step_size, float *ratio_par_window_norm_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float ratio_norm = depth_normalize_val[idx] / mean_chr_norm;
float ratio_norm = normalize_depth_results[idx] / mean_chr_norm;
ratio_par_window_norm_results[idx] = ratio_norm;
}
}
// Kernel pour calculer le z_score par window sur le ratio
// Kernel pour calculer le z_score
__global__ void z_score_kernel(float *ratio_norm, float mean_ratio, float std_ratio, int seq_length, int window_size, int step_size, float *z_score_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float z_score = (ratio_norm[idx] - mean_ratio) / std_ratio;
float value = ratio_norm[idx];
// Verification si la valeur est inf ou NaN, et remplacer par 0 si c est le cas
if (isinf(value) || isnan(value)) {
value = 0.0f;
}
// Calcul du Z-score
float z_score = (value - mean_ratio) / std_ratio;
z_score_results[idx] = z_score;
}
}
......@@ -284,7 +293,15 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio_norm, float mean_ratio,
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float ratio_mean_ratio = ratio_norm[idx] / mean_ratio;
float value = ratio_norm[idx];
// Verification si la valeur est inf ou NaN et remplacement par 0 si c est le cas
if (isinf(value) || isnan(value)) {
value = 0.0f;
}
// Calcul du ratio par rapport au ratio moyen
float ratio_mean_ratio = value / mean_ratio;
ratio_par_mean_ratio_results[idx] = ratio_mean_ratio;
}
}
......@@ -746,7 +763,7 @@ def calcul_moy_totale(normalize_depth_results, chr):
# Calculate the mean of non-zero results
mean_chr_norm = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
sys.stderr.write("Chromosome : %s, mean_chr_norm : %s\n" % (chr, mean_chr_norm))
logging.info(f"Mean chr_norm_no_zero = {mean_chr_norm}")
end_time = time.time()
elapsed_time = end_time - start_time
......@@ -790,7 +807,7 @@ def calcul_std(normalize_depth_results, chr):
return std_chr
def compute_mean_std_med(ratio_par_window_norm_results, chr):
def compute_mean_std_med(ratio_par_window_norm_results, chr, normalize_depth_results):
"""
Compute the mean, standard deviation, and median of non-zero ratio results per window.
......@@ -812,6 +829,7 @@ def compute_mean_std_med(ratio_par_window_norm_results, chr):
# Filter results to remove zero and -1 values
ratio_par_window_norm_results = np.array(ratio_par_window_norm_results)
non_zero_results = ratio_par_window_norm_results[ratio_par_window_norm_results != 0]
non_zero_results = non_zero_results[np.isfinite(non_zero_results)]
# Initialize list for stats computation
table = []
......@@ -1163,7 +1181,12 @@ def filter_read_gc(gc_results, depth_results, chr):
elapsed_time = end_time - start_time
logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"gc_35 = {gc_35}")
gc_35_non_zero = gc_35[gc_35 != 0]
np_mean_gc_35 = np.mean(gc_35_non_zero)
len_gc_35 = len(gc_35_non_zero)
logging.info(f"np_mean_gc_35 = {np_mean_gc_35}, len_gc_35_non_zero = {len_gc_35}, len_gc_35 = {len(gc_35)}")
return gc_35, gc_40, gc_45, gc_50, gc_55
......@@ -1196,11 +1219,11 @@ def filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, blo
filter_read_gc_kernel_cuda(d_gc_results, d_depth_results, np.int32(n), gc_35_gpu, gc_40_gpu, gc_45_gpu, gc_50_gpu, gc_55_gpu, block=(block_size, 1, 1), grid=(grid_size, 1))
# Copy back results
gc_35 = np.zeros(max_size, dtype=np.float32)
gc_40 = np.zeros(max_size, dtype=np.float32)
gc_45 = np.zeros(max_size, dtype=np.float32)
gc_50 = np.zeros(max_size, dtype=np.float32)
gc_55 = np.zeros(max_size, dtype=np.float32)
gc_35 = np.zeros(max_size, dtype = np.float32)
gc_40 = np.zeros(max_size, dtype = np.float32)
gc_45 = np.zeros(max_size, dtype = np.float32)
gc_50 = np.zeros(max_size, dtype = np.float32)
gc_55 = np.zeros(max_size, dtype = np.float32)
cuda.memcpy_dtoh(gc_35, gc_35_gpu)
cuda.memcpy_dtoh(gc_40, gc_40_gpu)
......@@ -1208,19 +1231,24 @@ def filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, blo
cuda.memcpy_dtoh(gc_50, gc_50_gpu)
cuda.memcpy_dtoh(gc_55, gc_55_gpu)
gc_35 = gc_35[1:int(gc_35[0])+1]
gc_40 = gc_40[1:int(gc_40[0])+1]
gc_45 = gc_45[1:int(gc_45[0])+1]
gc_50 = gc_50[1:int(gc_50[0])+1]
gc_55 = gc_55[1:int(gc_55[0])+1]
gc_35_non_zero = gc_35[gc_35 != 0] # Filtre les valeurs égales à 0
np_mean_gc_35 = np.mean(gc_35_non_zero)
len_gc_35 = len(gc_35_non_zero)
logging.info(f"np_mean_gc_35 = {np_mean_gc_35}, len_gc_35_non_zero = {len_gc_35}, len_gc_35 = {len(gc_35)}")
gc_35 = gc_35[gc_35 != 0]
gc_40 = gc_40[gc_40 != 0]
gc_45 = gc_40[gc_40 != 0]
gc_50 = gc_40[gc_40 != 0]
gc_55 = gc_40[gc_40 != 0]
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving filter_read_gc for {chr} (Time taken: {elapsed_time:.4f} seconds)")
logging.info(f"gc_35 = {gc_35}")
return [gc_35, gc_40, gc_45, gc_50, gc_55]
return gc_35, gc_40, gc_45, gc_50, gc_55
def calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr):
logging.info(f"Entering calc_polynom for {chr}")
......@@ -1274,6 +1302,11 @@ def find_polynom(ratio_par_window_results, chr, polynomials, depth_results):
best_polynom = None
best_knorm_diff = float('inf')
depth_results_non_zero = depth_results[depth_results != 0]
depth_results_filter = np.unique(depth_results_non_zero)
logging.info(f"depth_results_len = {len(depth_results)}, depth_results_non_zero_len = {len(depth_results_non_zero)}, depth_results_filter_len = {len(depth_results_filter)}")
# Precompute k / 2 for each unique cn value
cn_unique_div2 = cn_unique / 2
......@@ -1281,23 +1314,21 @@ def find_polynom(ratio_par_window_results, chr, polynomials, depth_results):
for polynom in polynomials:
logging.info(f"polynom = {polynom}\tpolynomials = {polynomials}\tcn_unique = {cn_unique}")
if polynom is not None:
# Iterate over each depth result
for RCi in depth_results:
# Iterate over each unique cn value
for k, k_div2 in zip(cn_unique, cn_unique_div2):
knorm = polynom(k) * k_div2
knorm_diff = abs(knorm - RCi)
if knorm_diff < best_knorm_diff:
best_knorm_diff = knorm_diff
# Calculate knorm for all cn_unique values at once
knorms = polynom(cn_unique) * cn_unique_div2[:, None]
# Calculate the differences between knorms and each RCi, then find the minimum difference
for RCi in depth_results_filter:
knorm_diffs = np.abs(knorms - RCi)
min_knorm_diff = np.min(knorm_diffs)
if min_knorm_diff < best_knorm_diff:
best_knorm_diff = min_knorm_diff
best_polynom = polynom
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Leaving find_polynom for {chr} (Time taken: {elapsed_time:.4f} seconds)")
return best_polynom
return best_polynom
def normalize(depth_results, best_polynom, map_results):
......@@ -1306,12 +1337,24 @@ def normalize(depth_results, best_polynom, map_results):
normalize_depth_results = []
for i in range(len(depth_results)):
if depth_results[i] != 0:
if depth_results[i] != 0 and map_results[i] != 0:
normalized_value = depth_results[i] / (best_polynom(i) * map_results[i]) if best_polynom is not None else depth_results[i]
#logging.info(f"normalized_value = {normalized_value}\ti = {i}\tdepth_results[i] = {depth_results[i]}\tbest_polynom(i) = {best_polynom(i)}\tmap_results[i] = {map_results[i]}")
normalize_depth_results.append(normalized_value)
else:
normalize_depth_results.append(depth_results[i])
normalize_depth_results.append(0)
#normalize_depth_results_unique = np.unique(normalize_depth_results)
#logging.info(f"normalize_depth_results_unique = {normalize_depth_results_unique}")
has_nan = np.isnan(normalize_depth_results).any()
has_inf = np.isinf(normalize_depth_results).any()
if has_nan:
logging.warning(f"{chr} contient des valeurs NaN.")
if has_inf:
logging.warning(f"{chr} contient des valeurs Inf.")
end_time = time.time()
elapsed_time = end_time - start_time
......@@ -1606,16 +1649,19 @@ def main_calcul(
#Appel fonction read_depth selon gc
sys.stderr.write("\t appel fonctions filter_results\n")
gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr)
gc_arrays = filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr)
#gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc(gc_results, depth_results, chr)
gc_35, gc_40, gc_45, gc_50, gc_55 = filter_read_gc_gpu(gc_results, d_gc_results, d_depth_results, grid_size, block_size, chr)
#Appel fonction calcul polynomes
sys.stderr.write("\t appel fonctions calc_polynom\n")
polynomials = calc_polynom(gc_35, gc_40, gc_45, gc_50, gc_55, chr)
#Appel fonction calcul moyenne
sys.stderr.write("\t appel fonctions calc_mean_chr\n")
mean_chr = calc_mean_chr(depth_results, chr)
# Appeler le kernel de ratio
sys.stderr.write("\t appel fonction ratio_par_window_kernel_cuda\n")
ratio_par_window_kernel_cuda(
d_depth_results,
np.float32(mean_chr),
......@@ -1626,17 +1672,20 @@ def main_calcul(
block=(block_size, 1, 1),
grid=(grid_size, 1),
)
sys.stderr.write("\t appel fonction ratio_par_window_kernel_cuda\n")
context.synchronize()
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote
sys.stderr.write("\tCopie des resultats ratio depuis le peripherique CUDA vers l'hote\n")
cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
#Appel fonction meilleur polynome
sys.stderr.write("\t appel fonction find_polynom\n")
best_polynom = find_polynom(ratio_par_window_results, chr, polynomials, depth_results)
#Appel fonction normalisation
sys.stderr.write("\t appel fonction normalize\n")
normalize_depth_results = normalize(depth_results, best_polynom, map_results)
# Appel fonctions medianes
......@@ -1673,6 +1722,7 @@ def main_calcul(
#context.synchronize()
#Copier les resultats normalises depuis l'hôte vers le peripherique CUDA
sys.stderr.write("\t Copie des resultats normalises depuis l'hôte vers le peripherique CUDA\n")
d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
cuda.memcpy_htod(d_normalize_depth_results, normalize_depth_results)
......@@ -1698,10 +1748,12 @@ def main_calcul(
context.synchronize()
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote
sys.stderr.write("\t Copie des resultats ratio depuis le peripherique CUDA vers l'hote\n")
cuda.memcpy_dtoh(ratio_par_window_norm_results, d_ratio_par_window_norm_results)
# Création table à partir du ratio
mean_ratio, std_ratio, med_ratio = compute_mean_std_med(ratio_par_window_norm_results, chr)
sys.stderr.write("\t Appel fonction compute_mean_std_med\n")
mean_ratio, std_ratio, med_ratio = compute_mean_std_med(ratio_par_window_norm_results, chr, normalize_depth_results)
# Appeler le kernel de calcule du ratio divisé par le ratio moyen
ratio_par_mean_ratio_kernel_cuda(
......@@ -1753,15 +1805,15 @@ def main_calcul(
segmentation(events, segment, chr)
# Appel fonction display_results_vcf
display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr)
#display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr)
#Ecrire les résultats dans le fichier de sortie
#with open(output_file, 'a') as f:
# sys.stderr.write("\t ecriture des fichiers\n")
# for i, (depth_data, depth_correction, depth_normalize_val, gc_data, map_data, ratio_norm, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, depth_correction_results, normalize_depth_results, gc_results, map_results, ratio_par_window_norm_results, ratio_par_mean_ratio_results, z_score_results)):
# pos_start = (i * step_size) + 1
# pos_end = pos_start + window_size
# f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_correction}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio_norm}\t{ratio_mean_ratio}\t{z_score}\n")
with open(output_file, 'a') as f:
sys.stderr.write("\t ecriture des fichiers\n")
for i, (depth_data, depth_normalize_val, gc_data, map_data, ratio_norm, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, normalize_depth_results, gc_results, map_results, ratio_par_window_norm_results, ratio_par_mean_ratio_results, z_score_results)):
pos_start = (i * step_size) + 1
pos_end = pos_start + window_size
f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio_norm}\t{ratio_mean_ratio}\t{z_score}\n")
# Programme principal
# Calcul nombre de coeurs max pour le GPU
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment