Formatting code and add functions

be5e1926 · Theo Serralta · 91be97c9 · be5e1926
Commit be5e1926 authored Jun 19, 2024 by Theo Serralta
Hide whitespace changes
Inline Side-by-side

Showing with 292 additions and 121 deletions

test_gpu_mean_depth.py CNV/test_gpu_mean_depth.py +292 -121

No files found.
--- a/CNV/test_gpu_mean_depth.py
+++ b/CNV/test_gpu_mean_depth.py
@@ -47,19 +47,22 @@ def parse_arguments():
        Each element can be None if the corresponding argument was not provided.
    """
    try:
-        opts, args = getopt.getopt(sys.argv[1:], "b:w:s:z:l:t:o:e:")
+        opts, args = getopt.getopt(sys.argv[1:], "b:c:w:s:z:l:t:o:e:")
        (
            bamfile_path,
+            num_chr,
            window_size,
            step_size,
            zscore_threshold,
            lengthFilter,
            output_file,
            logfile,
-        ) = (None, None, None, None, None, None, None)
+        ) = (None, None, None, None, None, None, None, None)
        for opt, arg in opts:
            if opt in ("-b"):
                bamfile_path = arg
+            if opt in ("-c"):
+                num_chr = arg
            if opt in ("-w"):
                window_size = int(arg)
            if opt in ("-s"):
@@ -74,6 +77,7 @@ def parse_arguments():
                logfile = arg
        return (
            bamfile_path,
+            num_chr,
            window_size,
            step_size,
            zscore_threshold,
@@ -89,6 +93,7 @@ def parse_arguments():
 if __name__ == "__main__":
    (
        bamfile_path,
+        num_chr,
        window_size,
        step_size,
        zscore_threshold,
@@ -128,6 +133,24 @@ __global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_
    }
 }
+__global__ void calcul_depth_count_kernel(int *depth_data_count, int seq_length, int window_size, int step_size, float *depth_results_count) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    if (idx < seq_length - window_size + 1) {
+        int pos_start = (idx * step_size) + 1;
+        int pos_end = pos_start + window_size;
+        int count_reads = 0;
+        for (int i = pos_start; i < pos_end; i++) {
+            count_reads += depth_data_count[i];
+        }
+        float avg_depth_count = (float)count_reads / window_size;
+        depth_results_count[idx] = avg_depth_count;
+    }
+}
 //Kernel pour calculer le GC content
 __global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) {
@@ -245,6 +268,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio, float mean_ratio, int 
 # Obtention de la fonction de kernel compilée
 calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
+calcul_depth_count_kernel_cuda = mod.get_function("calcul_depth_count_kernel")
 calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
 calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
 calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
@@ -280,6 +304,7 @@ def merge_intervals(intervals):
            merged.append((start, end, score))
            start, end, score = interval
    merged.append((start, end, score))
    return merged
@@ -301,9 +326,12 @@ def dico_mappabilite(mappability_file):
        A dictionary where keys are chromosome names and values are another dictionary with start positions as keys
        and mappability scores as values.
    """
-    sys.stderr.write("\t Entering dico_mappabilite\n")
+    logging.info("Entering dico_mappability")
    mappability_dico = {}
+    logging.info(" In dico_mappability : Open file and create the dico")
    with open(mappability_file, "r") as f:
        for line in f:
            fields = line.strip().split("\t")
@@ -320,19 +348,29 @@ def dico_mappabilite(mappability_file):
            mappability_dico[chromosome].append((start_pos, end_pos, score))
+    logging.info(f"In dico_mappability : Leaving file and create the dico")
    # Add position 0 for each chromosome
+    logging.info(" In dico_mappability : Add position 0 for each chromosome")
    for chromosome, intervals in mappability_dico.items():
        if intervals[0][0] != 0:
            mappability_dico[chromosome].insert(0, (0, intervals[0][0], 0))
+    logging.info(f"In dico_mappability : Ending add position 0 for each chromosome")
    # Merge intervals with the same score
+    logging.info(" In dico_mappability : Merge intervals with the same score")
    for chromosome, intervals in mappability_dico.items():
        merged_intervals = merge_intervals(intervals)
        mappability_dico[chromosome] = {
            start: score for start, _, score in merged_intervals
        }
+    logging.info(f"In dico_mappability : Ending merge intervals with the same score")
+    logging.info(f"Leaving dico_mappability")
-    sys.stderr.write("\t Leaving dico_mappabilite\n")
    return mappability_dico
@@ -357,24 +395,33 @@ def calcul_mappability(seq_length, mappability, chr):
    numpy.ndarray
        An array of mappability scores for the given sequence length.
    """
-    sys.stderr.write("\t Entering calcul_mappability \n")
+    logging.info(f"Entering calcul_mappability for {chr}")
    map_data = np.zeros(seq_length, dtype=np.float32)
    sorted_keys = sorted(mappability[chr].keys())
    prev_bound = 0
    prev_mappability = 0
+    logging.info(f"In calcul_mappability : Entering for bound in sorted_keys for {chr}")
    for bound in sorted_keys:
        for i in range(prev_bound, min(seq_length, bound)):
            map_data[i] = prev_mappability
        prev_bound = bound
        prev_mappability = mappability[chr][bound]
+    logging.info(f"In calcul_mappability : Leaving for bound in sorted_keys for {chr}")
    # Fill remaining positions if sequence length exceeds last bound
+    logging.info(f"In calcul_mappability : Entering for i in range(prev_bound, seq_length) for {chr}")
    for i in range(prev_bound, seq_length):
        map_data[i] = prev_mappability
-    sys.stderr.write("\t Leaving calcul_mappability \n")
+    logging.info(f"In calcul_mappability : Leaving for i in range(prev_bound, seq_length) for {chr}")
+    logging.info(f"Leaving dico_mappability for {chr}")
    return map_data
@@ -394,7 +441,8 @@ def parse_fasta(gc_file):
    dict
        A dictionary where keys are sequence headers and values are the corresponding sequences.
    """
-    sys.stderr.write("\t Entering parse_fasta\n")
+    logging.info("Entering parse_fasta")
    sequences = {}
    with open(gc_file, "r") as f:
        data = f.read().split(">")
@@ -404,7 +452,8 @@ def parse_fasta(gc_file):
            sequence = "".join(lines[1:])
            sequences[header] = sequence
-    sys.stderr.write("\t Leaving parse_fasta\n")
+    logging.info(f"Leaving parse_fasta")
    return sequences
@@ -428,12 +477,14 @@ def calcul_gc_content(seq_length, chr, seq):
    numpy.ndarray
        An array of bytes ('S' dtype) representing the GC content for the given sequence length.
    """
-    sys.stderr.write("\t Entering calcul_gc_content\n")
+    logging.info(f"Entering calcul_gc_content for {chr}")
    gc_data = np.zeros(seq_length, dtype="S")
    for i in range(len(seq[chr])):
        gc_data[i] = seq[chr][i]
-    sys.stderr.write("\t Leaving calcul_gc_content\n")
+    logging.info(f"Leaving calcul_gc_content for {chr}")
    return gc_data
@@ -457,18 +508,103 @@ def calcul_depth_seq(seq_length, bamfile_path, chr):
    numpy.ndarray
        An array of integers representing the sequencing depth for the given sequence length.
    """
-    sys.stderr.write("\t Entering calcul_depth_seq\n")
+    logging.info(f"Entering calcul_depth_seq for {chr}")
    depth_data = np.zeros(seq_length, dtype=np.int32)
-    for pileupcolumn in bamfile_path.pileup():
+    for pileupcolumn in bamfile_path.pileup(reference = chr):
-        if pileupcolumn.reference_pos >= seq_length:
+        pos = pileupcolumn.reference_pos
+        if pos >= seq_length:
            break
-        depth_data[pileupcolumn.reference_pos] = pileupcolumn.nsegments
+        depth_data[pos] = pileupcolumn.nsegments
+    #depth_data = bamfile_path.pileup().nsegments
+    logging.info(f"Leaving calcul_depth_seq for {chr}")
+    return depth_data
+def calcul_depth_seq_copy(seq_length, bamfile_path, chr):
+    """
+    Calculate the sequencing depth for a given chromosome.
+    This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
+    Parameters
+    ----------
+    seq_length : int
+        The length of the sequence.
+    bamfile_path : pysam.AlignmentFile
+        The path to the BAM file opened with pysam.AlignmentFile.
+    chr : str
+        The chromosome for which the depth is calculated.
+    Returns
+    -------
+    numpy.ndarray
+        An array of integers representing the sequencing depth for the given sequence length.
+    """
+    logging.info(f"Entering calcul_depth_seq for {chr}")
+    depth_data = np.zeros(seq_length, dtype=np.int32)
+    i = 0
+    it = bamfile_path.pileup(reference = chr)
+    while i < (seq_length - 1):
+        t = next(it)
+        pos = t.reference_pos
+        depth_data[pos] = t.nsegments
+        print(i)
+        i += 1
+    #for pileupcolumn in bamfile_path.pileup():
+      #  pos = pileupcolumn.reference_pos
+     #   if pos >= seq_length:
+       #     break
+        #depth_data[pos] = pileupcolumn.nsegments
+    #print(bamfile_path.pileup())    print("########################")
+#    depth_data = np.array(bamfile_path.pileup().nsegments)
+    logging.info(f"Leaving calcul_depth_seq for {chr}")
-    sys.stderr.write("\t Leaving calcul_depth_seq\n")
    return depth_data
+def calcul_depth_seq_count(seq_length, bamfile_path, chr):
+    """
+    Calculate the sequencing depth for a given chromosome.
+    This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
+    Parameters
+    ----------
+    seq_length : int
+        The length of the sequence.
+    bamfile_path : pysam.AlignmentFile
+        The path to the BAM file opened with pysam.AlignmentFile.
+    chr : str
+        The chromosome for which the depth is calculated.
+    Returns
+    -------
+    numpy.ndarray
+        An array of integers representing the sequencing depth for the given sequence length.
+    """
+    logging.info(f"Entering calcul_depth_seq for {chr}")
+    depth_data_count = np.zeros(seq_length, dtype=np.int32)
+    for i in range(seq_length):
+        depth_data_count[i] = bamfile_path.count(contig = chr, start = i, stop = i+1)
+    logging.info(f"Leaving calcul_depth_seq for {chr}")
+    return depth_data_count
-def calcul_med_total(depth_correction_results):
+def calcul_med_total(depth_correction_results, chr):
    """
    Calculate the median of non-zero depth correction results.
@@ -484,17 +620,22 @@ def calcul_med_total(depth_correction_results):
    float
        The median of the non-zero depth correction values, or 0 if no non-zero values are present.
    """
-    sys.stderr.write("\t entering calcul_med_total\n")
+    logging.info(f"Entering calcul_med_total for {chr}")
    depth_correction_results = np.array(depth_correction_results)
    # Filter results to remove zero values
    non_zero_results = depth_correction_results[depth_correction_results != 0]
    # Calculate the median of non-zero results
    m = np.median(non_zero_results) if non_zero_results.size > 0 else 0
-    sys.stderr.write("\t Leaving calcul_med_total\n")
+    sys.stderr.write("Chromosome : %s, med_chr : %s\n" % (chr, m))
+    logging.info(f"Leaving calcul_med_total for {chr}")
    return m
-def calcul_med_same_gc(gc_results, depth_correction_results):
+def calcul_med_same_gc(gc_results, depth_correction_results, chr):
    """
    Calculate the median depth correction for each unique GC content value.
@@ -512,7 +653,8 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
    dict
        A dictionary where keys are unique GC content values and values are the median depth correction for those GC values.
    """
-    sys.stderr.write("\t entering calcul_med_same_gc\n")
+    logging.info(f"Entering calcul_med_same_gc for {chr}")
    mGC = []
    depth_correction_results_array = np.array(depth_correction_results)
    unique_gc_values = np.unique(gc_results)
@@ -537,11 +679,12 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
    gc_to_median = dict(mGC)
-    sys.stderr.write("\t Leaving calcul_med_same_gc\n")
+    logging.info(f"Leaving calcul_med_same_gc for {chr}")
    return gc_to_median
-def calcul_moy_totale(normalize_depth_results):
+def calcul_moy_totale(normalize_depth_results, chr):
    """
    Calculate the mean of non-zero normalized depth results.
@@ -557,45 +700,23 @@ def calcul_moy_totale(normalize_depth_results):
    float
        The mean of the non-zero normalized depth values, or 0 if no non-zero values are present.
    """
-    sys.stderr.write("\t entering calcul_moy_totale\n")
+    logging.info(f"Entering calcul_moy_totale for {chr}")
    normalize_depth_results = np.array(normalize_depth_results)
    # Filter results to remove zero values
    non_zero_results = normalize_depth_results[normalize_depth_results != 0]
    # Calculate the mean of non-zero results
    mean_chr = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
-    print(mean_chr)
-    sys.stderr.write("\t Leaving calcul_moy_totale\n")
-    return mean_chr
-def calcul_moy_totale_ratio(ratio_par_window_results):
+    sys.stderr.write("Chromosome : %s, mean_chr : %s\n" % (chr, mean_chr))
-    """
-    Calculate the mean of non-zero ratio results per window.
-    This function filters out zero values from the ratio results per window and computes the mean of the remaining values.
-    Parameters
+    logging.info(f"Leaving calcul_moy_totale for {chr}")    
-    ----------
-    ratio_par_window_results : list or numpy.ndarray
+    return mean_chr
-        A list or array of ratio values per window.
-    Returns
-    -------
-    float
-        The mean of the non-zero ratio values per window, or 0 if no non-zero values are present.
-    """
-    sys.stderr.write("\t entering calcul_moy_totale_ratio\n")
-    ratio_par_window_results = np.array(ratio_par_window_results)
-    # Filter results to remove zero values
-    non_zero_results = ratio_par_window_results[ratio_par_window_results != 0]
-    # Calculate the mean of non-zero results
-    mean_ratio = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
-    print(mean_ratio)
-    sys.stderr.write("\t Leaving calcul_moy_totale_ratio\n")
-    return mean_ratio
-def calcul_std(normalize_depth_results):
+def calcul_std(normalize_depth_results, chr):
    """
    Calculate the standard deviation of non-zero normalized depth results.
@@ -611,45 +732,23 @@ def calcul_std(normalize_depth_results):
    float
        The standard deviation of the non-zero normalized depth values, or 0 if no non-zero values are present.
    """
-    sys.stderr.write("\t entering calcul_std\n")
+    logging.info(f"Entering calcul_std for {chr}")
    normalize_depth_results = np.array(normalize_depth_results)
    # Filter results to remove zero values
    non_zero_results = normalize_depth_results[normalize_depth_results != 0]
    # Calculate the standard deviation of non-zero results
    std_chr = np.std(non_zero_results) if non_zero_results.size > 0 else 0
-    print(std_chr)
-    sys.stderr.write("\t Leaving calcul_std\n")
-    return std_chr
-def calcul_std_ratio(ratio_par_window_results):
-    """
-    Calculate the standard deviation of non-zero ratio results per window.
-    This function filters out zero values from the ratio results per window and computes the standard deviation of the remaining values.
-    Parameters
-    ----------
-    ratio_par_window_results : list or numpy.ndarray
-        A list or array of ratio values per window.
-    Returns
+    sys.stderr.write("Chromosome : %s, std_chr : %s\n" % (chr, std_chr))
-    -------
-    float
+    logging.info(f"Leaving calcul_std for {chr}")      
-        The standard deviation of the non-zero ratio values per window, or 0 if no non-zero values are present.
-    """
+    return std_chr
-    sys.stderr.write("\t entering calcul_std_ratio\n")
-    ratio_par_window_results = np.array(ratio_par_window_results)
-    # Filter results to remove zero values
-    non_zero_results = ratio_par_window_results[ratio_par_window_results != 0]
-    # Calculate the standard deviation of non-zero results
-    std_ratio = np.std(non_zero_results) if non_zero_results.size > 0 else 0
-    print(std_ratio)
-    sys.stderr.write("\t Leaving calcul_std_ratio\n")
-    return std_ratio
-def compute_mean_and_std(ratio_par_window_results):
+def compute_mean_std_med(ratio_par_window_results, chr):
    """
    Compute the mean, standard deviation, and median of non-zero ratio results per window.
@@ -665,7 +764,7 @@ def compute_mean_and_std(ratio_par_window_results):
    tuple
        A tuple containing the mean, standard deviation, and median of the filtered ratio values.
    """
-    sys.stderr.write("Computing stats : \n")
+    logging.info(f"Entering compute_mean_std_med for {chr}")
    # Filter results to remove zero and -1 values
    ratio_par_window_results = np.array(ratio_par_window_results)
@@ -685,15 +784,14 @@ def compute_mean_and_std(ratio_par_window_results):
    std_ratio = np.std(table) if table else 0
    med_ratio = np.median(table) if table else 0
-    # Display results
+    sys.stderr.write("Chromosome : %s, mean_ratio : %s, std_ratio : %s, med_ratio : %s\n" % (chr, mean_ratio, std_ratio, med_ratio))
-    print(mean_ratio, std_ratio, med_ratio)
-    sys.stderr.write("Computation done\n")
+    logging.info(f"Leaving compute_mean_std_med for {chr}")
    # Return results
    return mean_ratio, std_ratio, med_ratio
-def cn_level(x):
+def cn_level(x, chr):
    """
    Determine the copy number level based on the given value.
@@ -744,10 +842,15 @@ def get_sample_name(bamfile_path):
    str
        The sample name extracted from the BAM file. If no sample name is found, returns "UnknownSample".
    """
+    logging.info(f"Entering get_sample_name")
    with pysam.AlignmentFile(bamfile_path, "rb") as bamfile:
        for read_group in bamfile.header.get("RG", []):
            if "SM" in read_group:
                return read_group["SM"]
+    logging.info(f"Leaving get_sample_name")
    return "UnknownSample"
@@ -773,12 +876,15 @@ def create_signal(signal, chr, z_score_results, step_size):
    None
        The function modifies the signal dictionary in place.
    """
+    logging.info(f"Entering create_signal for {chr} (include copy_number_level)")
    if chr not in signal:
        signal[chr] = {}
    for i in range(len(z_score_results)):
        pos = (i * step_size) + 1
        signal[chr][pos] = z_score_results[i]
+    logging.info(f"Leaving create_signal for {chr} (include copy_number_level)") 
 def detect_events(
    z_score_results,
@@ -813,7 +919,8 @@ def detect_events(
    None
        The function modifies the events dictionary in place.
    """
-    sys.stderr.write("\t starting detect_events\n")
+    logging.info(f"Entering detect_events for {chr}")
    c = 0
    for i, z_score in enumerate(z_score_results):
        if (z_score <= -zscore_threshold) or (z_score >= zscore_threshold):
@@ -822,7 +929,7 @@ def detect_events(
            if med_ratio == 0:
                c = 0
            else:
-                c = cn_level(float(ratio_par_mean_ratio_results[i]))
+                c = cn_level(float(ratio_par_mean_ratio_results[i]), chr)
            if z_score >= zscore_threshold:
                c = 3
@@ -832,10 +939,10 @@ def detect_events(
            pos_end = pos_start + window_size
            events[chr][(pos_start, pos_end)] = c
-    sys.stderr.write("\t ending detect_events\n")
+    logging.info(f"Leaving detect_events for {chr}") 
-def segmentation(events, segment):
+def segmentation(events, segment, chr):
    """
    Segment the detected events into contiguous regions with the same copy number level.
@@ -853,12 +960,11 @@ def segmentation(events, segment):
    None
        The function modifies the segment dictionary in place.
    """
-    sys.stderr.write("starting segmentation : \n")
+    logging.info(f"Entering segmentation for {chr}")
    for k in events.keys():
        sys.stderr.write("\tfor chromosome %s\n" % k)
-        starts = 0
+        starts, oldPos, oldLevel = 0, 0, -1
-        oldPos = 0
-        oldLevel = -1
        for p in sorted(events[k].keys()):
            level = events[k][p]
            # new coordinates
@@ -871,9 +977,7 @@ def segmentation(events, segment):
                    segment[k][index]["start"] = starts
                    segment[k][index]["end"] = oldPos + window_size
                    segment[k][index]["cn"] = oldLevel
-                    oldPos = p[0]
+                    oldPos, starts, oldLevel = p[0], p[0], level
-                    starts = p[0]
-                    oldLevel = level
                    continue
                else:
                    starts = p[0]
@@ -887,18 +991,15 @@ def segmentation(events, segment):
                    segment[k][index]["start"] = starts
                    segment[k][index]["end"] = oldPos
                    segment[k][index]["cn"] = oldLevel
-                    oldPos = p[0]
+                    oldPos, starts, oldLevel = p[0], p[0], level
-                    starts = p[0]
-                    oldLevel = level
                    continue
                else:
                    oldLevel = level
-            oldPos = p[0]
+            oldPos, oldLevel = p[0], level
-            oldLevel = level
-    sys.stderr.write("segmentation done\n")
+    logging.info(f"Leaving segmentation for {chr}") 
-def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
+def display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr):
    """
    Generate a VCF file containing structural variant calls based on segmented regions and signal data.
@@ -923,7 +1024,7 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
        This function writes the structural variant calls to the specified output file in VCF format.
    """
    global header_written
-    sys.stderr.write("starting display results\n")
+    logging.info(f"Entering display_results_vcf for {chr}")
    with open(output_file, "a") as f:
        if not header_written:
@@ -973,11 +1074,10 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
                            int(segment[k][elt]["cn"]),
                        )
                    )
+    logging.info(f"Leaving display_results_vcf for {chr}") 
-#################################
-###### <---Fonction main--->######
-#################################
 def main_calcul(
    bamfile_path,
    chr,
@@ -1031,7 +1131,9 @@ def main_calcul(
    map_data = calcul_mappability(seq_length, mappability, chr)
    gc_data = calcul_gc_content(seq_length, chr, seq)
    depth_data = calcul_depth_seq(seq_length, bamfile_path, chr)
+    #depth_data_count = calcul_depth_seq_count(seq_length, bamfile_path, chr)
+    #depth_data = calcul_depth_seq_copy(seq_length, bamfile_path, chr)
    # Transférer le tableau NumPy vers CUDA
    d_depth_data = cuda.mem_alloc(depth_data.nbytes)
    cuda.memcpy_htod(d_depth_data, depth_data)
@@ -1039,6 +1141,13 @@ def main_calcul(
        "\t d_depth_data : %s, %s\n"
        % (d_depth_data, d_depth_data.as_buffer(sys.getsizeof(depth_data)))
    )
+    #d_depth_data_count = cuda.mem_alloc(depth_data_count.nbytes)
+    #cuda.memcpy_htod(d_depth_data_count, depth_data_count)
+    #sys.stderr.write(
+     #   "\t d_depth_data_count : %s, %s\n"
+      #  % (d_depth_data_count, d_depth_data_count.as_buffer(sys.getsizeof(depth_data_count)))
+    #)
    d_gc_data = cuda.mem_alloc(gc_data.nbytes)
    cuda.memcpy_htod(d_gc_data, gc_data)
@@ -1066,6 +1175,11 @@ def main_calcul(
    )
    sys.stderr.write("\t Definition de depth_results\n")
+    depth_results_count = np.zeros(
+        int((seq_length - window_size) / step_size) + 1, dtype=np.float32
+    )
+    sys.stderr.write("\t Definition de depth_results_count\n")
    gc_results = np.zeros(
        int((seq_length - window_size) / step_size) + 1, dtype=np.float32
    )
@@ -1109,6 +1223,13 @@ def main_calcul(
    )
    sys.stderr.write("\t depth_results.nbytes = %s\n" % depth_results.nbytes)
+    d_depth_results_count = cuda.mem_alloc(depth_results_count.nbytes)
+    sys.stderr.write(
+        "\t d_depth_results_count = %s\n"
+        % d_depth_results_count.as_buffer(sys.getsizeof(d_depth_results_count))
+    )
+    sys.stderr.write("\t depth_results_count.nbytes = %s\n" % depth_results_count.nbytes)
    d_gc_results = cuda.mem_alloc(gc_results.nbytes)
    sys.stderr.write(
        "\t d_gc_results = %s\n" % d_gc_results.as_buffer(sys.getsizeof(d_gc_results))
@@ -1184,6 +1305,17 @@ def main_calcul(
    )
    sys.stderr.write("\t appel fonction calcul_depth_kernel_cuda\n")
+    #calcul_depth_count_kernel_cuda(
+     #   d_depth_data_count,
+      #  np.int32(seq_length),
+       # np.int32(window_size),
+        #np.int32(step_size),
+        #d_depth_results,
+        #block=(block_size, 1, 1),
+        #grid=(grid_size, 1),
+    #)
+    #sys.stderr.write("\t appel fonction calcul_depth_count_kernel_cuda\n")
    calcul_gc_kernel_cuda(
        d_gc_data,
        np.int32(seq_length),
@@ -1226,6 +1358,11 @@ def main_calcul(
    sys.stderr.write(
        "\t Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)\n"
    )
+    #cuda.memcpy_dtoh(depth_results_count, d_depth_results_count)
+    #sys.stderr.write(
+     #   "\t Copie les resultats du GPU (d_depth_results_count) vers le CPU (depth_results_count)\n"
+    #)
    cuda.memcpy_dtoh(gc_results, d_gc_results)  # cuda.memcpy_dtoh(dest, src)
    sys.stderr.write(
@@ -1248,8 +1385,8 @@ def main_calcul(
    # Appel fonctions medianes
    sys.stderr.write("\t appel fonctions calcul medianes\n")
-    m = calcul_med_total(depth_correction_results)
+    m = calcul_med_total(depth_correction_results, chr)
-    gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results)
+    gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
    # Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
    sys.stderr.write("\t Conversion medianes en tableau numpy\n")
@@ -1286,7 +1423,7 @@ def main_calcul(
    # Appel fonction moyenne
    sys.stderr.write("\t appel fonction calcul moyenne\n")
-    mean_chr = calcul_moy_totale(normalize_depth_results)
+    mean_chr = calcul_moy_totale(normalize_depth_results, chr)
    # Appeler le kernel de ratio
    ratio_par_window_kernel_cuda(
@@ -1307,7 +1444,7 @@ def main_calcul(
    cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
    # Création table à partir du ratio
-    mean_ratio, std_ratio, med_ratio = compute_mean_and_std(ratio_par_window_results)
+    mean_ratio, std_ratio, med_ratio = compute_mean_std_med(ratio_par_window_results, chr)
    # Appeler le kernel de calcule du ratio divisé par le ratio moyen
    ratio_par_mean_ratio_kernel_cuda(
@@ -1356,11 +1493,18 @@ def main_calcul(
    )
    # Appel fonction segmentation
-    segmentation(events, segment)
+    segmentation(events, segment, chr)
    # Appel fonction display_results_vcf
-    display_results_vcf(sample, segment, signal, lengthFilter, output_file)
+    display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr)
+    #Ecrire les résultats dans le fichier de sortie
+    #with open(output_file, 'a') as f:
+     #   sys.stderr.write("\t ecriture des fichiers\n")
+      #  for i, (depth_data, depth_correction, depth_normalize_val, gc_data, map_data, ratio, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, depth_correction_results, normalize_depth_results, gc_results, map_results, ratio_par_window_results, ratio_par_mean_ratio_results, z_score_results)):
+       #     pos_start = (i * step_size) + 1
+        #    pos_end = pos_start + window_size
+         #   f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_correction}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio}\t{ratio_mean_ratio}\t{z_score}\n")
 # Programme principal
 # Calcul nombre de coeurs max pour le GPU
@@ -1374,13 +1518,41 @@ print("Nombre de CPU: ", multiprocessing.cpu_count())
 print(f"Nombre de coeurs max GPU: {num_cores}")
 gc_file = "/work/gad/shared/pipeline/grch38/index/grch38_essential.fa"
-mappability_file = "/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph"
+mappability_file = "/work/gad/shared/pipeline/grch38/cnv/k100.Umap.MultiTrackMappability.bedgraph"
 seq = parse_fasta(gc_file)
 mappability = dico_mappabilite(mappability_file)
-with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
+if num_chr == "ALL" :
-    for i, seq_length in enumerate(bamfile_handle.lengths):
+    with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
-        chr = bamfile_handle.references[i]
+        for i, seq_length in enumerate(bamfile_handle.lengths):
+            chr = bamfile_handle.references[i]
+            sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))
+            # Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
+            main_calcul(
+                bamfile_handle,
+                chr,
+                seq_length,
+                window_size,
+                step_size,
+                zscore_threshold,
+                lengthFilter,
+                output_file,
+                sample,
+            )
+        logging.basicConfig(
+            filename="%s" % (logfile),
+            filemode="a",
+            level=logging.INFO,
+            format="%(asctime)s %(levelname)s - %(message)s",
+        )
+        logging.info("end")
+else :
+    with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
+        seq_length = bamfile_handle.lengths[int(num_chr) - 1]
+        chr = bamfile_handle.references[int(num_chr) - 1]
        sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))
        # Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
@@ -1403,4 +1575,3 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
        format="%(asctime)s %(levelname)s - %(message)s",
    )
    logging.info("end")