Commit 74f8d0ae authored by Theo Serralta's avatar Theo Serralta

Add some functions

parent c832a6ba
......@@ -9,14 +9,14 @@ from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
from pycuda.autoinit import context
import multiprocessing
from multiprocessing import Process
from multiprocessing import Process, Queue
# Options
def parse_arguments():
try:
opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:')
bamfile_path, window_size, step_size, output_file, logfile = None, None, None, None, None
opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:z:t:o:e:')
bamfile_path, window_size, step_size, zscore_threshold, output_file, logfile = None, None, None, None, None, None
for opt, arg in opts:
if opt in ("-b"):
bamfile_path = arg
......@@ -24,17 +24,19 @@ def parse_arguments():
window_size = int(arg)
if opt in ("-s"):
step_size = int(arg)
if opt in ("-z"):
zscore_threshold = float(arg)
if opt in ("-o"):
output_file = arg
if opt in ("-e"):
logfile = arg
return bamfile_path, window_size, step_size, output_file, logfile
return bamfile_path, window_size, step_size, zscore_threshold, output_file, logfile
except getopt.GetoptError:
print('Invalid argument')
sys.exit(1)
if __name__ == '__main__':
bamfile_path, window_size, step_size, output_file, logfile = parse_arguments()
bamfile_path, window_size, step_size, zscore_threshold, output_file, logfile = parse_arguments()
logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s')
logging.info('start')
global seq
......@@ -109,12 +111,68 @@ __global__ void calcul_map_kernel(float *map_data, int seq_length, int window_si
}
}
// Kernel pour calculer la lecture de profondeur corrigee par la mappabilitee
__global__ void calcul_depth_correction_kernel(float *depth_results, float *map_results, int seq_length, int window_size, int step_size, float *depth_correction_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float avg_depth = depth_results[idx];
float avg_map = map_results[idx];
// Verification si avg_map est egal a 0 pour eviter la division par 0
float depth_correction = (avg_map != 0.0f) ? (avg_depth / avg_map) : 0.0f;
depth_correction_results[idx] = depth_correction;
}
}
// Kernel pour normaliser la profondeur corrigee
__global__ void normalize_depth_kernel(float *depth_correction, float *gc_results, float m, float *gc_to_median, int seq_length, int window_size, int step_size, float *depth_normalize) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float mGC = gc_to_median[(int)gc_results[idx]];
// Verification si mGC est egal a 0 pour eviter la division par 0
float depth_normalize_val = (mGC != 0.0f) ? (depth_correction[idx] * m / mGC) : 0.0f;
depth_normalize[idx] = depth_normalize_val;
}
}
// Kernel pour calculer le ratio par window
__global__ void ratio_par_window_kernel(float *depth_normalize_val, float mean_chr, int seq_length, int window_size, int step_size, float *ratio_par_window_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float ratio = depth_normalize_val[idx] / mean_chr;
ratio_par_window_results[idx] = ratio;
}
}
// Kernel pour calculer le z_score par window
__global__ void z_score_kernel(float *depth_normalize_val, float mean_chr, float std_chr, int seq_length, int window_size, int step_size, float *z_score_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float z_score = (depth_normalize_val[idx] - mean_chr) / std_chr;
z_score_results[idx] = z_score;
}
}
""")
# Obtention de la fonction de kernel compilée
calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
normalize_depth_kernel_cuda = mod.get_function("normalize_depth_kernel")
ratio_par_window_kernel_cuda = mod.get_function("ratio_par_window_kernel")
z_score_kernel_cuda = mod.get_function("z_score_kernel")
#############################################
######<---Fonctions mappability--->#########
......@@ -230,30 +288,84 @@ def calcul_depth_seq(seq_length, bamfile_path, chr):
sys.stderr.write("\t Leaving calcul_depth_seq\n")
return depth_data
#############################################
######<---Fonctions calcul medianes--->######
#############################################
def calcul_med_total(depth_correction_results):
sys.stderr.write("\t entering calcul_med_total\n")
depth_correction_results = np.array(depth_correction_results)
# Filtrer les résultats pour enlever les valeurs égales à 0
non_zero_results = depth_correction_results[depth_correction_results != 0]
# Calculer la médiane des résultats non nuls
m = np.median(non_zero_results) if non_zero_results.size > 0 else 0
sys.stderr.write("\t Leaving calcul_med_total\n")
return m
def calcul_med_same_gc(gc_results, depth_correction_results):
sys.stderr.write("\t entering calcul_med_same_gc\n")
mGC = []
depth_correction_results_array = np.array(depth_correction_results)
unique_gc_values = np.unique(gc_results)
for gc in unique_gc_values:
indices = np.where(gc_results == gc) # Donne les positions où se trouve chaque valeur de unique_gc_values dans le tableau gc_results
# Filtrer les résultats de depth_correction pour enlever les valeurs égales à 0
filtered_depths = depth_correction_results_array[indices][depth_correction_results_array[indices] != 0]
if filtered_depths.size > 0: # Calculer la médiane seulement si les résultats filtrés ne sont pas vides
median_gc = np.median(filtered_depths)
else:
median_gc = 0 # Ou une autre valeur par défaut si tous les résultats sont 0
#print(f'valeur {gc} position {indices}, filtered_depths = {filtered_depths} et median_gc = {median_gc}')
mGC.append((gc, median_gc))
gc_to_median = dict(mGC)
sys.stderr.write("\t Leaving calcul_med_same_gc\n")
#print(gc_to_median)
return gc_to_median
###########################################
######<---Fonction calcul moyenne--->######
###########################################
def calcul_moy_totale(normalize_depth_results):
sys.stderr.write("\t entering calcul_moy_totale\n")
normalize_depth_results = np.array(normalize_depth_results)
# Filtrer les résultats pour enlever les valeurs égales à 0
non_zero_results = normalize_depth_results[normalize_depth_results != 0]
# Calculer la mpyenne des résultats non nuls
mean_chr = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
print(mean_chr)
sys.stderr.write("\t Leaving calcul_moy_totale\n")
return mean_chr
#############################################
######<---Fonction calcul std--->#####
#############################################
def calcul_std(normalize_depth_results):
sys.stderr.write("\t entering calcul_std\n")
normalize_depth_results = np.array(normalize_depth_results)
# Filtrer les résultats pour enlever les valeurs égales à 0
non_zero_results = normalize_depth_results[normalize_depth_results != 0]
# Calculer le std des résultats non nuls
std_chr = np.std(non_zero_results) if non_zero_results.size > 0 else 0
print(std_chr)
sys.stderr.write("\t Leaving calcul_std\n")
return std_chr
#################################
######<---Fonction main--->######
#################################
def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_file):
def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, zscore_threshold, output_file):
sys.stderr.write("\t entering main_calcul\n")
global seq
# Calcul mappability
map_data = Process(target = calcul_mappability, args=(seq_length, mappability, chr))
# Calcul GC
gc_data = Process(target = calcul_gc_content, args = (seq_length, chr, seq))
# Calcul depth seq
depth_data = Process(target = calcul_depth_seq, args = (seq_length, bamfile_path, chr))
map_data.start()
gc_data.start()
depth_data.start()
map_data.join()
gc_data.join()
depth_data.join()
# Appeler les différentes fonctions
map_data = calcul_mappability(seq_length, mappability, chr)
gc_data = calcul_gc_content(seq_length, chr, seq)
depth_data = calcul_depth_seq(seq_length, bamfile_path, chr)
# Transférer le tableau NumPy vers CUDA
d_depth_data = cuda.mem_alloc(depth_data.nbytes)
......@@ -274,7 +386,7 @@ def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_fi
grid_size = int((int((seq_length - window_size) / step_size) + 1) / block_size)+1
sys.stderr.write("\t grid_size = \n")
# Initialiser le tableau pour stocker les résultats de la profondeur moyenne
# Initialiser les tableaux pour stocker les résultats
depth_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de depth_results\n")
......@@ -284,6 +396,18 @@ def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_fi
map_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de map_results\n")
depth_correction_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de depth_correction_results\n")
normalize_depth_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de normalize_depth_results\n")
ratio_par_window_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de ratio_par_window\n")
z_score_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
sys.stderr.write("\t Definition de z_score_results\n")
# Allouer de la mémoire pour les résultats sur le périphérique CUDA
d_depth_results = cuda.mem_alloc(depth_results.nbytes)
sys.stderr.write("\t d_depth_results = %s\n" % d_depth_results.as_buffer(sys.getsizeof(d_depth_results)))
......@@ -297,15 +421,34 @@ def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_fi
sys.stderr.write("\t d_map_results = %s\n" % d_map_results.as_buffer(sys.getsizeof(d_map_results)))
sys.stderr.write("\t map_results.nbytes = %s\n" % map_results.nbytes)
d_depth_correction_results = cuda.mem_alloc(depth_correction_results.nbytes)
sys.stderr.write("\t d_depth_correction_results = %s\n" % d_depth_correction_results.as_buffer(sys.getsizeof(d_depth_correction_results)))
sys.stderr.write("\t depth_correction_results.nbytes = %s\n" % depth_correction_results.nbytes)
d_normalize_depth_results = cuda.mem_alloc(normalize_depth_results.nbytes)
sys.stderr.write("\t d_normalize_depth_results = %s\n" % d_normalize_depth_results.as_buffer(sys.getsizeof(d_normalize_depth_results)))
sys.stderr.write("\t normalize_depth_results.nbytes = %s\n" % normalize_depth_results.nbytes)
d_ratio_par_window_results = cuda.mem_alloc(ratio_par_window_results.nbytes)
sys.stderr.write("\t d_ratio_par_window_results = %s\n" % d_ratio_par_window_results.as_buffer(sys.getsizeof(d_ratio_par_window_results)))
sys.stderr.write("\t ratio_par_window_results.nbytes = %s\n" % ratio_par_window_results.nbytes)
d_z_score_results = cuda.mem_alloc(z_score_results.nbytes)
sys.stderr.write("\t d_z_score_results = %s\n" % d_z_score_results.as_buffer(sys.getsizeof(d_z_score_results)))
sys.stderr.write("\t z_score_results.nbytes = %s\n" % z_score_results.nbytes)
# Appeler la fonction de calcul de profondeur avec CUDA
calcul_depth_kernel_cuda(d_depth_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_depth_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction calc_depth_kernel_cuda\n")
sys.stderr.write("\t appel fonction calcul_depth_kernel_cuda\n")
calcul_gc_kernel_cuda(d_gc_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_gc_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction calc_gc_kernel_cuda\n")
sys.stderr.write("\t appel fonction calcul_gc_kernel_cuda\n")
calcul_map_kernel_cuda(d_map_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_map_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction calc_map_kernel_cuda\n")
sys.stderr.write("\t appel fonction calcul_map_kernel_cuda\n")
calcul_depth_correction_kernel_cuda(d_depth_results, d_map_results, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_depth_correction_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction calcul_depth_correction_kernel_cuda\n")
context.synchronize()
......@@ -319,18 +462,109 @@ def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_fi
cuda.memcpy_dtoh(map_results, d_map_results) #cuda.memcpy_dtoh(dest, src)
sys.stderr.write("\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n")
cuda.memcpy_dtoh(depth_correction_results, d_depth_correction_results) #cuda.memcpy_dtoh(dest, src)
sys.stderr.write("\t Copie les resultats du GPU (d_depth_correction_results) vers le CPU (depth_correction_results)\n")
###NORMALISATION###
#Appel fonctions medianes
sys.stderr.write("\t appel fonctions calcul medianes\n")
m = calcul_med_total(depth_correction_results)
gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results)
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
sys.stderr.write("\t Conversion medianes en tableau numpy\n")
gc_to_median_array = np.zeros(int(max(gc_results)) + 1, dtype=np.float32)
for gc, median in gc_to_median.items():
gc_to_median_array[int(gc)] = median
# Allouer de la memoire pour gc_to_median sur le peripherique CUDA
sys.stderr.write("\t Allocation mémoire médianes GPU\n")
d_gc_to_median = cuda.mem_alloc(gc_to_median_array.nbytes)
cuda.memcpy_htod(d_gc_to_median, gc_to_median_array)
# Appeler le kernel de normalisation
normalize_depth_kernel_cuda(d_depth_correction_results, d_gc_results, np.float32(m), d_gc_to_median, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_normalize_depth_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction normalize_depth_kernel_cuda\n")
context.synchronize()
# Copier les resultats normalises depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(normalize_depth_results, d_normalize_depth_results)
###Ratio par window###
#Appel fonction moyenne
sys.stderr.write("\t appel fonction calcul moyenne\n")
mean_chr = calcul_moy_totale(normalize_depth_results)
# Appeler le kernel de normalisation
ratio_par_window_kernel_cuda(d_normalize_depth_results, np.float32(mean_chr), np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_ratio_par_window_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction ratio_par_window_kernel_cuda\n")
context.synchronize()
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
###Z-score###
#Appel fonction ecart-type
sys.stderr.write("\t appel fonction ecart-type\n")
std_chr = calcul_std(normalize_depth_results)
# Appeler le kernel de normalisation
z_score_kernel_cuda(d_normalize_depth_results, np.float32(mean_chr), np.float32(std_chr), np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_z_score_results, block=(block_size, 1, 1), grid=(grid_size, 1))
sys.stderr.write("\t appel fonction z_score_kernel_cuda\n")
context.synchronize()
# Copier les resultats ratio depuis le peripherique CUDA vers l'hote
cuda.memcpy_dtoh(z_score_results, d_z_score_results)
# Ecrire les résultats dans le fichier de sortie
with open(output_file, 'a') as f:
sys.stderr.write("\t ecriture des fichiers\n")
for i, (avg_depth, avg_gc, avg_map) in enumerate(zip(depth_results, gc_results, map_results)):
# with open(output_file, 'a') as f:
# sys.stderr.write("\t ecriture des fichiers\n")
# for i, (avg_depth, avg_gc, avg_map, depth_correction, depth_normalize_val) in enumerate(zip(depth_results, gc_results, map_results, depth_correction_results, normalize_depth_results)):
# pos_start = (i * step_size) + 1
# pos_end = pos_start + window_size
# f.write(f"{chr}\t{pos_start}\t{pos_end}\t{avg_depth}\t{avg_gc}\t{avg_map}\t{depth_correction}\t{depth_normalize_val}\n")
#
# with open(output_file, 'a') as f:
# sys.stderr.write("\t ecriture des fichiers\n")
# for i, (depth_normalize_val, ratio, z_score) in enumerate(zip(normalize_depth_results, ratio_par_window_results, z_score_results)):
# pos_start = (i * step_size) + 1
# pos_end = pos_start + window_size
# f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_normalize_val}\t{ratio}\t{z_score}\n")
# Filtrer les résultats selon le z-score et stocker dans un tableau NumPy
max_windows = int((seq_length - window_size) / step_size) + 1
filtered_windows = np.zeros((max_windows, 6), dtype=object)
count = 0
sys.stderr.write("\t stockage des zscore selon le threshold\n")
for i, (depth_normalize_val, ratio, z_score) in enumerate(zip(normalize_depth_results, ratio_par_window_results, z_score_results)):
if z_score <= -zscore_threshold or z_score >= zscore_threshold:
pos_start = (i * step_size) + 1
pos_end = pos_start + window_size
f.write(f"{chr}\t{pos_start}\t{pos_end}\t{avg_depth}\t{avg_gc}\t{avg_map}\n")
filtered_windows[count] = [chr, pos_start, pos_end, depth_normalize_val, ratio, z_score]
count += 1
# Redimensionner le tableau pour enlever les lignes inutilisées
filtered_windows = filtered_windows[:count]
# Écrire les résultats filtrés dans le fichier de sortie
with open(output_file, 'a') as f:
sys.stderr.write("\t ecriture des fichiers\n")
for window in filtered_windows:
f.write(f"{window[0]}\t{window[1]}\t{window[2]}\t{window[3]}\t{window[4]}\t{window[5]}\n")
# Programme principal
#Calcul nombre de coeurs max pour le GPU
device = cuda.Device(0)
attributes = device.get_attributes()
num_cores = attributes[1]
......@@ -348,7 +582,7 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))
# Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
main_calcul(bamfile_handle, chr, seq_length, window_size, step_size, output_file)
main_calcul(bamfile_handle, chr, seq_length, window_size, step_size, zscore_threshold, output_file)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment