diff --git a/CNV/test_gpu_mean_depth.py b/CNV/test_gpu_mean_depth.py index f5902bb997a65e9796f51f553804ae9a2fb9f224..f58aac1a5e62fa0613931dc9f36e22ff602eb625 100644 --- a/CNV/test_gpu_mean_depth.py +++ b/CNV/test_gpu_mean_depth.py @@ -47,19 +47,22 @@ def parse_arguments(): Each element can be None if the corresponding argument was not provided. """ try: - opts, args = getopt.getopt(sys.argv[1:], "b:w:s:z:l:t:o:e:") + opts, args = getopt.getopt(sys.argv[1:], "b:c:w:s:z:l:t:o:e:") ( bamfile_path, + num_chr, window_size, step_size, zscore_threshold, lengthFilter, output_file, logfile, - ) = (None, None, None, None, None, None, None) + ) = (None, None, None, None, None, None, None, None) for opt, arg in opts: if opt in ("-b"): bamfile_path = arg + if opt in ("-c"): + num_chr = arg if opt in ("-w"): window_size = int(arg) if opt in ("-s"): @@ -74,6 +77,7 @@ def parse_arguments(): logfile = arg return ( bamfile_path, + num_chr, window_size, step_size, zscore_threshold, @@ -89,6 +93,7 @@ def parse_arguments(): if __name__ == "__main__": ( bamfile_path, + num_chr, window_size, step_size, zscore_threshold, @@ -128,6 +133,24 @@ __global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_ } } +__global__ void calcul_depth_count_kernel(int *depth_data_count, int seq_length, int window_size, int step_size, float *depth_results_count) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < seq_length - window_size + 1) { + int pos_start = (idx * step_size) + 1; + int pos_end = pos_start + window_size; + int count_reads = 0; + + + for (int i = pos_start; i < pos_end; i++) { + count_reads += depth_data_count[i]; + } + + float avg_depth_count = (float)count_reads / window_size; + depth_results_count[idx] = avg_depth_count; + } +} + //Kernel pour calculer le GC content __global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) { @@ -245,6 +268,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio, float mean_ratio, int # Obtention de la fonction de kernel compilée calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel") +calcul_depth_count_kernel_cuda = mod.get_function("calcul_depth_count_kernel") calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel") calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel") calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel") @@ -280,6 +304,7 @@ def merge_intervals(intervals): merged.append((start, end, score)) start, end, score = interval merged.append((start, end, score)) + return merged @@ -301,9 +326,12 @@ def dico_mappabilite(mappability_file): A dictionary where keys are chromosome names and values are another dictionary with start positions as keys and mappability scores as values. """ - sys.stderr.write("\t Entering dico_mappabilite\n") + logging.info("Entering dico_mappability") + mappability_dico = {} + logging.info(" In dico_mappability : Open file and create the dico") + with open(mappability_file, "r") as f: for line in f: fields = line.strip().split("\t") @@ -320,19 +348,29 @@ def dico_mappabilite(mappability_file): mappability_dico[chromosome].append((start_pos, end_pos, score)) + logging.info(f"In dico_mappability : Leaving file and create the dico") + # Add position 0 for each chromosome + logging.info(" In dico_mappability : Add position 0 for each chromosome") + for chromosome, intervals in mappability_dico.items(): if intervals[0][0] != 0: mappability_dico[chromosome].insert(0, (0, intervals[0][0], 0)) + logging.info(f"In dico_mappability : Ending add position 0 for each chromosome") + # Merge intervals with the same score + logging.info(" In dico_mappability : Merge intervals with the same score") + for chromosome, intervals in mappability_dico.items(): merged_intervals = merge_intervals(intervals) mappability_dico[chromosome] = { start: score for start, _, score in merged_intervals } + + logging.info(f"In dico_mappability : Ending merge intervals with the same score") + logging.info(f"Leaving dico_mappability") - sys.stderr.write("\t Leaving dico_mappabilite\n") return mappability_dico @@ -357,24 +395,33 @@ def calcul_mappability(seq_length, mappability, chr): numpy.ndarray An array of mappability scores for the given sequence length. """ - sys.stderr.write("\t Entering calcul_mappability \n") + logging.info(f"Entering calcul_mappability for {chr}") + map_data = np.zeros(seq_length, dtype=np.float32) sorted_keys = sorted(mappability[chr].keys()) prev_bound = 0 prev_mappability = 0 + logging.info(f"In calcul_mappability : Entering for bound in sorted_keys for {chr}") + for bound in sorted_keys: for i in range(prev_bound, min(seq_length, bound)): map_data[i] = prev_mappability prev_bound = bound prev_mappability = mappability[chr][bound] + + logging.info(f"In calcul_mappability : Leaving for bound in sorted_keys for {chr}") # Fill remaining positions if sequence length exceeds last bound + logging.info(f"In calcul_mappability : Entering for i in range(prev_bound, seq_length) for {chr}") + for i in range(prev_bound, seq_length): map_data[i] = prev_mappability - sys.stderr.write("\t Leaving calcul_mappability \n") + logging.info(f"In calcul_mappability : Leaving for i in range(prev_bound, seq_length) for {chr}") + + logging.info(f"Leaving dico_mappability for {chr}") return map_data @@ -394,7 +441,8 @@ def parse_fasta(gc_file): dict A dictionary where keys are sequence headers and values are the corresponding sequences. """ - sys.stderr.write("\t Entering parse_fasta\n") + logging.info("Entering parse_fasta") + sequences = {} with open(gc_file, "r") as f: data = f.read().split(">") @@ -404,7 +452,8 @@ def parse_fasta(gc_file): sequence = "".join(lines[1:]) sequences[header] = sequence - sys.stderr.write("\t Leaving parse_fasta\n") + logging.info(f"Leaving parse_fasta") + return sequences @@ -428,12 +477,14 @@ def calcul_gc_content(seq_length, chr, seq): numpy.ndarray An array of bytes ('S' dtype) representing the GC content for the given sequence length. """ - sys.stderr.write("\t Entering calcul_gc_content\n") + logging.info(f"Entering calcul_gc_content for {chr}") + gc_data = np.zeros(seq_length, dtype="S") for i in range(len(seq[chr])): gc_data[i] = seq[chr][i] - sys.stderr.write("\t Leaving calcul_gc_content\n") + logging.info(f"Leaving calcul_gc_content for {chr}") + return gc_data @@ -457,18 +508,103 @@ def calcul_depth_seq(seq_length, bamfile_path, chr): numpy.ndarray An array of integers representing the sequencing depth for the given sequence length. """ - sys.stderr.write("\t Entering calcul_depth_seq\n") + logging.info(f"Entering calcul_depth_seq for {chr}") + depth_data = np.zeros(seq_length, dtype=np.int32) - for pileupcolumn in bamfile_path.pileup(): - if pileupcolumn.reference_pos >= seq_length: + for pileupcolumn in bamfile_path.pileup(reference = chr): + pos = pileupcolumn.reference_pos + if pos >= seq_length: break - depth_data[pileupcolumn.reference_pos] = pileupcolumn.nsegments + depth_data[pos] = pileupcolumn.nsegments + + #depth_data = bamfile_path.pileup().nsegments + + logging.info(f"Leaving calcul_depth_seq for {chr}") + + return depth_data + + +def calcul_depth_seq_copy(seq_length, bamfile_path, chr): + """ + Calculate the sequencing depth for a given chromosome. + + This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file. + + Parameters + ---------- + seq_length : int + The length of the sequence. + bamfile_path : pysam.AlignmentFile + The path to the BAM file opened with pysam.AlignmentFile. + chr : str + The chromosome for which the depth is calculated. + + Returns + ------- + numpy.ndarray + An array of integers representing the sequencing depth for the given sequence length. + """ + logging.info(f"Entering calcul_depth_seq for {chr}") + + depth_data = np.zeros(seq_length, dtype=np.int32) + + i = 0 + it = bamfile_path.pileup(reference = chr) + while i < (seq_length - 1): + t = next(it) + pos = t.reference_pos + depth_data[pos] = t.nsegments + print(i) + i += 1 + + + #for pileupcolumn in bamfile_path.pileup(): + # pos = pileupcolumn.reference_pos + # if pos >= seq_length: + # break + #depth_data[pos] = pileupcolumn.nsegments + + #print(bamfile_path.pileup()) print("########################") +# depth_data = np.array(bamfile_path.pileup().nsegments) + + + logging.info(f"Leaving calcul_depth_seq for {chr}") - sys.stderr.write("\t Leaving calcul_depth_seq\n") return depth_data + + +def calcul_depth_seq_count(seq_length, bamfile_path, chr): + """ + Calculate the sequencing depth for a given chromosome. + + This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file. + + Parameters + ---------- + seq_length : int + The length of the sequence. + bamfile_path : pysam.AlignmentFile + The path to the BAM file opened with pysam.AlignmentFile. + chr : str + The chromosome for which the depth is calculated. + + Returns + ------- + numpy.ndarray + An array of integers representing the sequencing depth for the given sequence length. + """ + logging.info(f"Entering calcul_depth_seq for {chr}") + + depth_data_count = np.zeros(seq_length, dtype=np.int32) + for i in range(seq_length): + depth_data_count[i] = bamfile_path.count(contig = chr, start = i, stop = i+1) + + logging.info(f"Leaving calcul_depth_seq for {chr}") + return depth_data_count -def calcul_med_total(depth_correction_results): + +def calcul_med_total(depth_correction_results, chr): """ Calculate the median of non-zero depth correction results. @@ -484,17 +620,22 @@ def calcul_med_total(depth_correction_results): float The median of the non-zero depth correction values, or 0 if no non-zero values are present. """ - sys.stderr.write("\t entering calcul_med_total\n") + logging.info(f"Entering calcul_med_total for {chr}") + depth_correction_results = np.array(depth_correction_results) # Filter results to remove zero values non_zero_results = depth_correction_results[depth_correction_results != 0] # Calculate the median of non-zero results m = np.median(non_zero_results) if non_zero_results.size > 0 else 0 - sys.stderr.write("\t Leaving calcul_med_total\n") + + sys.stderr.write("Chromosome : %s, med_chr : %s\n" % (chr, m)) + + logging.info(f"Leaving calcul_med_total for {chr}") + return m -def calcul_med_same_gc(gc_results, depth_correction_results): +def calcul_med_same_gc(gc_results, depth_correction_results, chr): """ Calculate the median depth correction for each unique GC content value. @@ -512,7 +653,8 @@ def calcul_med_same_gc(gc_results, depth_correction_results): dict A dictionary where keys are unique GC content values and values are the median depth correction for those GC values. """ - sys.stderr.write("\t entering calcul_med_same_gc\n") + logging.info(f"Entering calcul_med_same_gc for {chr}") + mGC = [] depth_correction_results_array = np.array(depth_correction_results) unique_gc_values = np.unique(gc_results) @@ -537,11 +679,12 @@ def calcul_med_same_gc(gc_results, depth_correction_results): gc_to_median = dict(mGC) - sys.stderr.write("\t Leaving calcul_med_same_gc\n") + logging.info(f"Leaving calcul_med_same_gc for {chr}") + return gc_to_median -def calcul_moy_totale(normalize_depth_results): +def calcul_moy_totale(normalize_depth_results, chr): """ Calculate the mean of non-zero normalized depth results. @@ -557,45 +700,23 @@ def calcul_moy_totale(normalize_depth_results): float The mean of the non-zero normalized depth values, or 0 if no non-zero values are present. """ - sys.stderr.write("\t entering calcul_moy_totale\n") + logging.info(f"Entering calcul_moy_totale for {chr}") + normalize_depth_results = np.array(normalize_depth_results) # Filter results to remove zero values non_zero_results = normalize_depth_results[normalize_depth_results != 0] # Calculate the mean of non-zero results mean_chr = np.mean(non_zero_results) if non_zero_results.size > 0 else 0 - print(mean_chr) - sys.stderr.write("\t Leaving calcul_moy_totale\n") - return mean_chr - -def calcul_moy_totale_ratio(ratio_par_window_results): - """ - Calculate the mean of non-zero ratio results per window. - - This function filters out zero values from the ratio results per window and computes the mean of the remaining values. + sys.stderr.write("Chromosome : %s, mean_chr : %s\n" % (chr, mean_chr)) - Parameters - ---------- - ratio_par_window_results : list or numpy.ndarray - A list or array of ratio values per window. + logging.info(f"Leaving calcul_moy_totale for {chr}") + + return mean_chr - Returns - ------- - float - The mean of the non-zero ratio values per window, or 0 if no non-zero values are present. - """ - sys.stderr.write("\t entering calcul_moy_totale_ratio\n") - ratio_par_window_results = np.array(ratio_par_window_results) - # Filter results to remove zero values - non_zero_results = ratio_par_window_results[ratio_par_window_results != 0] - # Calculate the mean of non-zero results - mean_ratio = np.mean(non_zero_results) if non_zero_results.size > 0 else 0 - print(mean_ratio) - sys.stderr.write("\t Leaving calcul_moy_totale_ratio\n") - return mean_ratio -def calcul_std(normalize_depth_results): +def calcul_std(normalize_depth_results, chr): """ Calculate the standard deviation of non-zero normalized depth results. @@ -611,45 +732,23 @@ def calcul_std(normalize_depth_results): float The standard deviation of the non-zero normalized depth values, or 0 if no non-zero values are present. """ - sys.stderr.write("\t entering calcul_std\n") + logging.info(f"Entering calcul_std for {chr}") + normalize_depth_results = np.array(normalize_depth_results) # Filter results to remove zero values non_zero_results = normalize_depth_results[normalize_depth_results != 0] # Calculate the standard deviation of non-zero results std_chr = np.std(non_zero_results) if non_zero_results.size > 0 else 0 - print(std_chr) - sys.stderr.write("\t Leaving calcul_std\n") - return std_chr - - -def calcul_std_ratio(ratio_par_window_results): - """ - Calculate the standard deviation of non-zero ratio results per window. - - This function filters out zero values from the ratio results per window and computes the standard deviation of the remaining values. - Parameters - ---------- - ratio_par_window_results : list or numpy.ndarray - A list or array of ratio values per window. - Returns - ------- - float - The standard deviation of the non-zero ratio values per window, or 0 if no non-zero values are present. - """ - sys.stderr.write("\t entering calcul_std_ratio\n") - ratio_par_window_results = np.array(ratio_par_window_results) - # Filter results to remove zero values - non_zero_results = ratio_par_window_results[ratio_par_window_results != 0] - # Calculate the standard deviation of non-zero results - std_ratio = np.std(non_zero_results) if non_zero_results.size > 0 else 0 - print(std_ratio) - sys.stderr.write("\t Leaving calcul_std_ratio\n") - return std_ratio + sys.stderr.write("Chromosome : %s, std_chr : %s\n" % (chr, std_chr)) + + logging.info(f"Leaving calcul_std for {chr}") + + return std_chr -def compute_mean_and_std(ratio_par_window_results): +def compute_mean_std_med(ratio_par_window_results, chr): """ Compute the mean, standard deviation, and median of non-zero ratio results per window. @@ -665,7 +764,7 @@ def compute_mean_and_std(ratio_par_window_results): tuple A tuple containing the mean, standard deviation, and median of the filtered ratio values. """ - sys.stderr.write("Computing stats : \n") + logging.info(f"Entering compute_mean_std_med for {chr}") # Filter results to remove zero and -1 values ratio_par_window_results = np.array(ratio_par_window_results) @@ -685,15 +784,14 @@ def compute_mean_and_std(ratio_par_window_results): std_ratio = np.std(table) if table else 0 med_ratio = np.median(table) if table else 0 - # Display results - print(mean_ratio, std_ratio, med_ratio) - sys.stderr.write("Computation done\n") - + sys.stderr.write("Chromosome : %s, mean_ratio : %s, std_ratio : %s, med_ratio : %s\n" % (chr, mean_ratio, std_ratio, med_ratio)) + + logging.info(f"Leaving compute_mean_std_med for {chr}") # Return results return mean_ratio, std_ratio, med_ratio -def cn_level(x): +def cn_level(x, chr): """ Determine the copy number level based on the given value. @@ -744,10 +842,15 @@ def get_sample_name(bamfile_path): str The sample name extracted from the BAM file. If no sample name is found, returns "UnknownSample". """ + + logging.info(f"Entering get_sample_name") + with pysam.AlignmentFile(bamfile_path, "rb") as bamfile: for read_group in bamfile.header.get("RG", []): if "SM" in read_group: return read_group["SM"] + + logging.info(f"Leaving get_sample_name") return "UnknownSample" @@ -773,12 +876,15 @@ def create_signal(signal, chr, z_score_results, step_size): None The function modifies the signal dictionary in place. """ + logging.info(f"Entering create_signal for {chr} (include copy_number_level)") + if chr not in signal: signal[chr] = {} for i in range(len(z_score_results)): pos = (i * step_size) + 1 signal[chr][pos] = z_score_results[i] + logging.info(f"Leaving create_signal for {chr} (include copy_number_level)") def detect_events( z_score_results, @@ -813,7 +919,8 @@ def detect_events( None The function modifies the events dictionary in place. """ - sys.stderr.write("\t starting detect_events\n") + logging.info(f"Entering detect_events for {chr}") + c = 0 for i, z_score in enumerate(z_score_results): if (z_score <= -zscore_threshold) or (z_score >= zscore_threshold): @@ -822,7 +929,7 @@ def detect_events( if med_ratio == 0: c = 0 else: - c = cn_level(float(ratio_par_mean_ratio_results[i])) + c = cn_level(float(ratio_par_mean_ratio_results[i]), chr) if z_score >= zscore_threshold: c = 3 @@ -832,10 +939,10 @@ def detect_events( pos_end = pos_start + window_size events[chr][(pos_start, pos_end)] = c - sys.stderr.write("\t ending detect_events\n") + logging.info(f"Leaving detect_events for {chr}") -def segmentation(events, segment): +def segmentation(events, segment, chr): """ Segment the detected events into contiguous regions with the same copy number level. @@ -853,12 +960,11 @@ def segmentation(events, segment): None The function modifies the segment dictionary in place. """ - sys.stderr.write("starting segmentation : \n") + logging.info(f"Entering segmentation for {chr}") + for k in events.keys(): sys.stderr.write("\tfor chromosome %s\n" % k) - starts = 0 - oldPos = 0 - oldLevel = -1 + starts, oldPos, oldLevel = 0, 0, -1 for p in sorted(events[k].keys()): level = events[k][p] # new coordinates @@ -871,9 +977,7 @@ def segmentation(events, segment): segment[k][index]["start"] = starts segment[k][index]["end"] = oldPos + window_size segment[k][index]["cn"] = oldLevel - oldPos = p[0] - starts = p[0] - oldLevel = level + oldPos, starts, oldLevel = p[0], p[0], level continue else: starts = p[0] @@ -887,18 +991,15 @@ def segmentation(events, segment): segment[k][index]["start"] = starts segment[k][index]["end"] = oldPos segment[k][index]["cn"] = oldLevel - oldPos = p[0] - starts = p[0] - oldLevel = level + oldPos, starts, oldLevel = p[0], p[0], level continue else: oldLevel = level - oldPos = p[0] - oldLevel = level - sys.stderr.write("segmentation done\n") + oldPos, oldLevel = p[0], level + logging.info(f"Leaving segmentation for {chr}") -def display_results_vcf(sample, segment, signal, lengthFilter, output_file): +def display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr): """ Generate a VCF file containing structural variant calls based on segmented regions and signal data. @@ -923,7 +1024,7 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file): This function writes the structural variant calls to the specified output file in VCF format. """ global header_written - sys.stderr.write("starting display results\n") + logging.info(f"Entering display_results_vcf for {chr}") with open(output_file, "a") as f: if not header_written: @@ -973,11 +1074,10 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file): int(segment[k][elt]["cn"]), ) ) + + logging.info(f"Leaving display_results_vcf for {chr}") -################################# -###### <---Fonction main--->###### -################################# def main_calcul( bamfile_path, chr, @@ -1031,7 +1131,9 @@ def main_calcul( map_data = calcul_mappability(seq_length, mappability, chr) gc_data = calcul_gc_content(seq_length, chr, seq) depth_data = calcul_depth_seq(seq_length, bamfile_path, chr) - + #depth_data_count = calcul_depth_seq_count(seq_length, bamfile_path, chr) + #depth_data = calcul_depth_seq_copy(seq_length, bamfile_path, chr) + # Transférer le tableau NumPy vers CUDA d_depth_data = cuda.mem_alloc(depth_data.nbytes) cuda.memcpy_htod(d_depth_data, depth_data) @@ -1039,6 +1141,13 @@ def main_calcul( "\t d_depth_data : %s, %s\n" % (d_depth_data, d_depth_data.as_buffer(sys.getsizeof(depth_data))) ) + + #d_depth_data_count = cuda.mem_alloc(depth_data_count.nbytes) + #cuda.memcpy_htod(d_depth_data_count, depth_data_count) + #sys.stderr.write( + # "\t d_depth_data_count : %s, %s\n" + # % (d_depth_data_count, d_depth_data_count.as_buffer(sys.getsizeof(depth_data_count))) + #) d_gc_data = cuda.mem_alloc(gc_data.nbytes) cuda.memcpy_htod(d_gc_data, gc_data) @@ -1066,6 +1175,11 @@ def main_calcul( ) sys.stderr.write("\t Definition de depth_results\n") + depth_results_count = np.zeros( + int((seq_length - window_size) / step_size) + 1, dtype=np.float32 + ) + sys.stderr.write("\t Definition de depth_results_count\n") + gc_results = np.zeros( int((seq_length - window_size) / step_size) + 1, dtype=np.float32 ) @@ -1109,6 +1223,13 @@ def main_calcul( ) sys.stderr.write("\t depth_results.nbytes = %s\n" % depth_results.nbytes) + d_depth_results_count = cuda.mem_alloc(depth_results_count.nbytes) + sys.stderr.write( + "\t d_depth_results_count = %s\n" + % d_depth_results_count.as_buffer(sys.getsizeof(d_depth_results_count)) + ) + sys.stderr.write("\t depth_results_count.nbytes = %s\n" % depth_results_count.nbytes) + d_gc_results = cuda.mem_alloc(gc_results.nbytes) sys.stderr.write( "\t d_gc_results = %s\n" % d_gc_results.as_buffer(sys.getsizeof(d_gc_results)) @@ -1184,6 +1305,17 @@ def main_calcul( ) sys.stderr.write("\t appel fonction calcul_depth_kernel_cuda\n") + #calcul_depth_count_kernel_cuda( + # d_depth_data_count, + # np.int32(seq_length), + # np.int32(window_size), + #np.int32(step_size), + #d_depth_results, + #block=(block_size, 1, 1), + #grid=(grid_size, 1), + #) + #sys.stderr.write("\t appel fonction calcul_depth_count_kernel_cuda\n") + calcul_gc_kernel_cuda( d_gc_data, np.int32(seq_length), @@ -1226,6 +1358,11 @@ def main_calcul( sys.stderr.write( "\t Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)\n" ) + + #cuda.memcpy_dtoh(depth_results_count, d_depth_results_count) + #sys.stderr.write( + # "\t Copie les resultats du GPU (d_depth_results_count) vers le CPU (depth_results_count)\n" + #) cuda.memcpy_dtoh(gc_results, d_gc_results) # cuda.memcpy_dtoh(dest, src) sys.stderr.write( @@ -1248,8 +1385,8 @@ def main_calcul( # Appel fonctions medianes sys.stderr.write("\t appel fonctions calcul medianes\n") - m = calcul_med_total(depth_correction_results) - gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results) + m = calcul_med_total(depth_correction_results, chr) + gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr) # Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA sys.stderr.write("\t Conversion medianes en tableau numpy\n") @@ -1286,7 +1423,7 @@ def main_calcul( # Appel fonction moyenne sys.stderr.write("\t appel fonction calcul moyenne\n") - mean_chr = calcul_moy_totale(normalize_depth_results) + mean_chr = calcul_moy_totale(normalize_depth_results, chr) # Appeler le kernel de ratio ratio_par_window_kernel_cuda( @@ -1307,7 +1444,7 @@ def main_calcul( cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results) # Création table à partir du ratio - mean_ratio, std_ratio, med_ratio = compute_mean_and_std(ratio_par_window_results) + mean_ratio, std_ratio, med_ratio = compute_mean_std_med(ratio_par_window_results, chr) # Appeler le kernel de calcule du ratio divisé par le ratio moyen ratio_par_mean_ratio_kernel_cuda( @@ -1356,11 +1493,18 @@ def main_calcul( ) # Appel fonction segmentation - segmentation(events, segment) + segmentation(events, segment, chr) # Appel fonction display_results_vcf - display_results_vcf(sample, segment, signal, lengthFilter, output_file) + display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr) + #Ecrire les résultats dans le fichier de sortie + #with open(output_file, 'a') as f: + # sys.stderr.write("\t ecriture des fichiers\n") + # for i, (depth_data, depth_correction, depth_normalize_val, gc_data, map_data, ratio, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, depth_correction_results, normalize_depth_results, gc_results, map_results, ratio_par_window_results, ratio_par_mean_ratio_results, z_score_results)): + # pos_start = (i * step_size) + 1 + # pos_end = pos_start + window_size + # f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_correction}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio}\t{ratio_mean_ratio}\t{z_score}\n") # Programme principal # Calcul nombre de coeurs max pour le GPU @@ -1374,13 +1518,41 @@ print("Nombre de CPU: ", multiprocessing.cpu_count()) print(f"Nombre de coeurs max GPU: {num_cores}") gc_file = "/work/gad/shared/pipeline/grch38/index/grch38_essential.fa" -mappability_file = "/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph" +mappability_file = "/work/gad/shared/pipeline/grch38/cnv/k100.Umap.MultiTrackMappability.bedgraph" seq = parse_fasta(gc_file) mappability = dico_mappabilite(mappability_file) -with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle: - for i, seq_length in enumerate(bamfile_handle.lengths): - chr = bamfile_handle.references[i] +if num_chr == "ALL" : + with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle: + for i, seq_length in enumerate(bamfile_handle.lengths): + chr = bamfile_handle.references[i] + sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length)) + + # Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome + main_calcul( + bamfile_handle, + chr, + seq_length, + window_size, + step_size, + zscore_threshold, + lengthFilter, + output_file, + sample, + ) + + logging.basicConfig( + filename="%s" % (logfile), + filemode="a", + level=logging.INFO, + format="%(asctime)s %(levelname)s - %(message)s", + ) + logging.info("end") + +else : + with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle: + seq_length = bamfile_handle.lengths[int(num_chr) - 1] + chr = bamfile_handle.references[int(num_chr) - 1] sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length)) # Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome @@ -1403,4 +1575,3 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle: format="%(asctime)s %(levelname)s - %(message)s", ) logging.info("end") -