Commit be5e1926 authored by Theo Serralta's avatar Theo Serralta

Formatting code and add functions

parent 91be97c9
......@@ -47,19 +47,22 @@ def parse_arguments():
Each element can be None if the corresponding argument was not provided.
"""
try:
opts, args = getopt.getopt(sys.argv[1:], "b:w:s:z:l:t:o:e:")
opts, args = getopt.getopt(sys.argv[1:], "b:c:w:s:z:l:t:o:e:")
(
bamfile_path,
num_chr,
window_size,
step_size,
zscore_threshold,
lengthFilter,
output_file,
logfile,
) = (None, None, None, None, None, None, None)
) = (None, None, None, None, None, None, None, None)
for opt, arg in opts:
if opt in ("-b"):
bamfile_path = arg
if opt in ("-c"):
num_chr = arg
if opt in ("-w"):
window_size = int(arg)
if opt in ("-s"):
......@@ -74,6 +77,7 @@ def parse_arguments():
logfile = arg
return (
bamfile_path,
num_chr,
window_size,
step_size,
zscore_threshold,
......@@ -89,6 +93,7 @@ def parse_arguments():
if __name__ == "__main__":
(
bamfile_path,
num_chr,
window_size,
step_size,
zscore_threshold,
......@@ -128,6 +133,24 @@ __global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_
}
}
__global__ void calcul_depth_count_kernel(int *depth_data_count, int seq_length, int window_size, int step_size, float *depth_results_count) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
int pos_start = (idx * step_size) + 1;
int pos_end = pos_start + window_size;
int count_reads = 0;
for (int i = pos_start; i < pos_end; i++) {
count_reads += depth_data_count[i];
}
float avg_depth_count = (float)count_reads / window_size;
depth_results_count[idx] = avg_depth_count;
}
}
//Kernel pour calculer le GC content
__global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) {
......@@ -245,6 +268,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio, float mean_ratio, int
# Obtention de la fonction de kernel compilée
calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
calcul_depth_count_kernel_cuda = mod.get_function("calcul_depth_count_kernel")
calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")
calcul_depth_correction_kernel_cuda = mod.get_function("calcul_depth_correction_kernel")
......@@ -280,6 +304,7 @@ def merge_intervals(intervals):
merged.append((start, end, score))
start, end, score = interval
merged.append((start, end, score))
return merged
......@@ -301,9 +326,12 @@ def dico_mappabilite(mappability_file):
A dictionary where keys are chromosome names and values are another dictionary with start positions as keys
and mappability scores as values.
"""
sys.stderr.write("\t Entering dico_mappabilite\n")
logging.info("Entering dico_mappability")
mappability_dico = {}
logging.info(" In dico_mappability : Open file and create the dico")
with open(mappability_file, "r") as f:
for line in f:
fields = line.strip().split("\t")
......@@ -320,19 +348,29 @@ def dico_mappabilite(mappability_file):
mappability_dico[chromosome].append((start_pos, end_pos, score))
logging.info(f"In dico_mappability : Leaving file and create the dico")
# Add position 0 for each chromosome
logging.info(" In dico_mappability : Add position 0 for each chromosome")
for chromosome, intervals in mappability_dico.items():
if intervals[0][0] != 0:
mappability_dico[chromosome].insert(0, (0, intervals[0][0], 0))
logging.info(f"In dico_mappability : Ending add position 0 for each chromosome")
# Merge intervals with the same score
logging.info(" In dico_mappability : Merge intervals with the same score")
for chromosome, intervals in mappability_dico.items():
merged_intervals = merge_intervals(intervals)
mappability_dico[chromosome] = {
start: score for start, _, score in merged_intervals
}
sys.stderr.write("\t Leaving dico_mappabilite\n")
logging.info(f"In dico_mappability : Ending merge intervals with the same score")
logging.info(f"Leaving dico_mappability")
return mappability_dico
......@@ -357,24 +395,33 @@ def calcul_mappability(seq_length, mappability, chr):
numpy.ndarray
An array of mappability scores for the given sequence length.
"""
sys.stderr.write("\t Entering calcul_mappability \n")
logging.info(f"Entering calcul_mappability for {chr}")
map_data = np.zeros(seq_length, dtype=np.float32)
sorted_keys = sorted(mappability[chr].keys())
prev_bound = 0
prev_mappability = 0
logging.info(f"In calcul_mappability : Entering for bound in sorted_keys for {chr}")
for bound in sorted_keys:
for i in range(prev_bound, min(seq_length, bound)):
map_data[i] = prev_mappability
prev_bound = bound
prev_mappability = mappability[chr][bound]
logging.info(f"In calcul_mappability : Leaving for bound in sorted_keys for {chr}")
# Fill remaining positions if sequence length exceeds last bound
logging.info(f"In calcul_mappability : Entering for i in range(prev_bound, seq_length) for {chr}")
for i in range(prev_bound, seq_length):
map_data[i] = prev_mappability
sys.stderr.write("\t Leaving calcul_mappability \n")
logging.info(f"In calcul_mappability : Leaving for i in range(prev_bound, seq_length) for {chr}")
logging.info(f"Leaving dico_mappability for {chr}")
return map_data
......@@ -394,7 +441,8 @@ def parse_fasta(gc_file):
dict
A dictionary where keys are sequence headers and values are the corresponding sequences.
"""
sys.stderr.write("\t Entering parse_fasta\n")
logging.info("Entering parse_fasta")
sequences = {}
with open(gc_file, "r") as f:
data = f.read().split(">")
......@@ -404,7 +452,8 @@ def parse_fasta(gc_file):
sequence = "".join(lines[1:])
sequences[header] = sequence
sys.stderr.write("\t Leaving parse_fasta\n")
logging.info(f"Leaving parse_fasta")
return sequences
......@@ -428,12 +477,14 @@ def calcul_gc_content(seq_length, chr, seq):
numpy.ndarray
An array of bytes ('S' dtype) representing the GC content for the given sequence length.
"""
sys.stderr.write("\t Entering calcul_gc_content\n")
logging.info(f"Entering calcul_gc_content for {chr}")
gc_data = np.zeros(seq_length, dtype="S")
for i in range(len(seq[chr])):
gc_data[i] = seq[chr][i]
sys.stderr.write("\t Leaving calcul_gc_content\n")
logging.info(f"Leaving calcul_gc_content for {chr}")
return gc_data
......@@ -457,18 +508,103 @@ def calcul_depth_seq(seq_length, bamfile_path, chr):
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
"""
sys.stderr.write("\t Entering calcul_depth_seq\n")
logging.info(f"Entering calcul_depth_seq for {chr}")
depth_data = np.zeros(seq_length, dtype=np.int32)
for pileupcolumn in bamfile_path.pileup():
if pileupcolumn.reference_pos >= seq_length:
for pileupcolumn in bamfile_path.pileup(reference = chr):
pos = pileupcolumn.reference_pos
if pos >= seq_length:
break
depth_data[pileupcolumn.reference_pos] = pileupcolumn.nsegments
depth_data[pos] = pileupcolumn.nsegments
#depth_data = bamfile_path.pileup().nsegments
logging.info(f"Leaving calcul_depth_seq for {chr}")
return depth_data
def calcul_depth_seq_copy(seq_length, bamfile_path, chr):
"""
Calculate the sequencing depth for a given chromosome.
This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
Parameters
----------
seq_length : int
The length of the sequence.
bamfile_path : pysam.AlignmentFile
The path to the BAM file opened with pysam.AlignmentFile.
chr : str
The chromosome for which the depth is calculated.
Returns
-------
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
"""
logging.info(f"Entering calcul_depth_seq for {chr}")
depth_data = np.zeros(seq_length, dtype=np.int32)
i = 0
it = bamfile_path.pileup(reference = chr)
while i < (seq_length - 1):
t = next(it)
pos = t.reference_pos
depth_data[pos] = t.nsegments
print(i)
i += 1
#for pileupcolumn in bamfile_path.pileup():
# pos = pileupcolumn.reference_pos
# if pos >= seq_length:
# break
#depth_data[pos] = pileupcolumn.nsegments
#print(bamfile_path.pileup()) print("########################")
# depth_data = np.array(bamfile_path.pileup().nsegments)
logging.info(f"Leaving calcul_depth_seq for {chr}")
sys.stderr.write("\t Leaving calcul_depth_seq\n")
return depth_data
def calcul_med_total(depth_correction_results):
def calcul_depth_seq_count(seq_length, bamfile_path, chr):
"""
Calculate the sequencing depth for a given chromosome.
This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
Parameters
----------
seq_length : int
The length of the sequence.
bamfile_path : pysam.AlignmentFile
The path to the BAM file opened with pysam.AlignmentFile.
chr : str
The chromosome for which the depth is calculated.
Returns
-------
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
"""
logging.info(f"Entering calcul_depth_seq for {chr}")
depth_data_count = np.zeros(seq_length, dtype=np.int32)
for i in range(seq_length):
depth_data_count[i] = bamfile_path.count(contig = chr, start = i, stop = i+1)
logging.info(f"Leaving calcul_depth_seq for {chr}")
return depth_data_count
def calcul_med_total(depth_correction_results, chr):
"""
Calculate the median of non-zero depth correction results.
......@@ -484,17 +620,22 @@ def calcul_med_total(depth_correction_results):
float
The median of the non-zero depth correction values, or 0 if no non-zero values are present.
"""
sys.stderr.write("\t entering calcul_med_total\n")
logging.info(f"Entering calcul_med_total for {chr}")
depth_correction_results = np.array(depth_correction_results)
# Filter results to remove zero values
non_zero_results = depth_correction_results[depth_correction_results != 0]
# Calculate the median of non-zero results
m = np.median(non_zero_results) if non_zero_results.size > 0 else 0
sys.stderr.write("\t Leaving calcul_med_total\n")
sys.stderr.write("Chromosome : %s, med_chr : %s\n" % (chr, m))
logging.info(f"Leaving calcul_med_total for {chr}")
return m
def calcul_med_same_gc(gc_results, depth_correction_results):
def calcul_med_same_gc(gc_results, depth_correction_results, chr):
"""
Calculate the median depth correction for each unique GC content value.
......@@ -512,7 +653,8 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
dict
A dictionary where keys are unique GC content values and values are the median depth correction for those GC values.
"""
sys.stderr.write("\t entering calcul_med_same_gc\n")
logging.info(f"Entering calcul_med_same_gc for {chr}")
mGC = []
depth_correction_results_array = np.array(depth_correction_results)
unique_gc_values = np.unique(gc_results)
......@@ -537,11 +679,12 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
gc_to_median = dict(mGC)
sys.stderr.write("\t Leaving calcul_med_same_gc\n")
logging.info(f"Leaving calcul_med_same_gc for {chr}")
return gc_to_median
def calcul_moy_totale(normalize_depth_results):
def calcul_moy_totale(normalize_depth_results, chr):
"""
Calculate the mean of non-zero normalized depth results.
......@@ -557,45 +700,23 @@ def calcul_moy_totale(normalize_depth_results):
float
The mean of the non-zero normalized depth values, or 0 if no non-zero values are present.
"""
sys.stderr.write("\t entering calcul_moy_totale\n")
logging.info(f"Entering calcul_moy_totale for {chr}")
normalize_depth_results = np.array(normalize_depth_results)
# Filter results to remove zero values
non_zero_results = normalize_depth_results[normalize_depth_results != 0]
# Calculate the mean of non-zero results
mean_chr = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
print(mean_chr)
sys.stderr.write("\t Leaving calcul_moy_totale\n")
return mean_chr
sys.stderr.write("Chromosome : %s, mean_chr : %s\n" % (chr, mean_chr))
def calcul_moy_totale_ratio(ratio_par_window_results):
"""
Calculate the mean of non-zero ratio results per window.
logging.info(f"Leaving calcul_moy_totale for {chr}")
This function filters out zero values from the ratio results per window and computes the mean of the remaining values.
return mean_chr
Parameters
----------
ratio_par_window_results : list or numpy.ndarray
A list or array of ratio values per window.
Returns
-------
float
The mean of the non-zero ratio values per window, or 0 if no non-zero values are present.
"""
sys.stderr.write("\t entering calcul_moy_totale_ratio\n")
ratio_par_window_results = np.array(ratio_par_window_results)
# Filter results to remove zero values
non_zero_results = ratio_par_window_results[ratio_par_window_results != 0]
# Calculate the mean of non-zero results
mean_ratio = np.mean(non_zero_results) if non_zero_results.size > 0 else 0
print(mean_ratio)
sys.stderr.write("\t Leaving calcul_moy_totale_ratio\n")
return mean_ratio
def calcul_std(normalize_depth_results):
def calcul_std(normalize_depth_results, chr):
"""
Calculate the standard deviation of non-zero normalized depth results.
......@@ -611,45 +732,23 @@ def calcul_std(normalize_depth_results):
float
The standard deviation of the non-zero normalized depth values, or 0 if no non-zero values are present.
"""
sys.stderr.write("\t entering calcul_std\n")
logging.info(f"Entering calcul_std for {chr}")
normalize_depth_results = np.array(normalize_depth_results)
# Filter results to remove zero values
non_zero_results = normalize_depth_results[normalize_depth_results != 0]
# Calculate the standard deviation of non-zero results
std_chr = np.std(non_zero_results) if non_zero_results.size > 0 else 0
print(std_chr)
sys.stderr.write("\t Leaving calcul_std\n")
return std_chr
def calcul_std_ratio(ratio_par_window_results):
"""
Calculate the standard deviation of non-zero ratio results per window.
sys.stderr.write("Chromosome : %s, std_chr : %s\n" % (chr, std_chr))
This function filters out zero values from the ratio results per window and computes the standard deviation of the remaining values.
logging.info(f"Leaving calcul_std for {chr}")
Parameters
----------
ratio_par_window_results : list or numpy.ndarray
A list or array of ratio values per window.
Returns
-------
float
The standard deviation of the non-zero ratio values per window, or 0 if no non-zero values are present.
"""
sys.stderr.write("\t entering calcul_std_ratio\n")
ratio_par_window_results = np.array(ratio_par_window_results)
# Filter results to remove zero values
non_zero_results = ratio_par_window_results[ratio_par_window_results != 0]
# Calculate the standard deviation of non-zero results
std_ratio = np.std(non_zero_results) if non_zero_results.size > 0 else 0
print(std_ratio)
sys.stderr.write("\t Leaving calcul_std_ratio\n")
return std_ratio
return std_chr
def compute_mean_and_std(ratio_par_window_results):
def compute_mean_std_med(ratio_par_window_results, chr):
"""
Compute the mean, standard deviation, and median of non-zero ratio results per window.
......@@ -665,7 +764,7 @@ def compute_mean_and_std(ratio_par_window_results):
tuple
A tuple containing the mean, standard deviation, and median of the filtered ratio values.
"""
sys.stderr.write("Computing stats : \n")
logging.info(f"Entering compute_mean_std_med for {chr}")
# Filter results to remove zero and -1 values
ratio_par_window_results = np.array(ratio_par_window_results)
......@@ -685,15 +784,14 @@ def compute_mean_and_std(ratio_par_window_results):
std_ratio = np.std(table) if table else 0
med_ratio = np.median(table) if table else 0
# Display results
print(mean_ratio, std_ratio, med_ratio)
sys.stderr.write("Computation done\n")
sys.stderr.write("Chromosome : %s, mean_ratio : %s, std_ratio : %s, med_ratio : %s\n" % (chr, mean_ratio, std_ratio, med_ratio))
logging.info(f"Leaving compute_mean_std_med for {chr}")
# Return results
return mean_ratio, std_ratio, med_ratio
def cn_level(x):
def cn_level(x, chr):
"""
Determine the copy number level based on the given value.
......@@ -744,10 +842,15 @@ def get_sample_name(bamfile_path):
str
The sample name extracted from the BAM file. If no sample name is found, returns "UnknownSample".
"""
logging.info(f"Entering get_sample_name")
with pysam.AlignmentFile(bamfile_path, "rb") as bamfile:
for read_group in bamfile.header.get("RG", []):
if "SM" in read_group:
return read_group["SM"]
logging.info(f"Leaving get_sample_name")
return "UnknownSample"
......@@ -773,12 +876,15 @@ def create_signal(signal, chr, z_score_results, step_size):
None
The function modifies the signal dictionary in place.
"""
logging.info(f"Entering create_signal for {chr} (include copy_number_level)")
if chr not in signal:
signal[chr] = {}
for i in range(len(z_score_results)):
pos = (i * step_size) + 1
signal[chr][pos] = z_score_results[i]
logging.info(f"Leaving create_signal for {chr} (include copy_number_level)")
def detect_events(
z_score_results,
......@@ -813,7 +919,8 @@ def detect_events(
None
The function modifies the events dictionary in place.
"""
sys.stderr.write("\t starting detect_events\n")
logging.info(f"Entering detect_events for {chr}")
c = 0
for i, z_score in enumerate(z_score_results):
if (z_score <= -zscore_threshold) or (z_score >= zscore_threshold):
......@@ -822,7 +929,7 @@ def detect_events(
if med_ratio == 0:
c = 0
else:
c = cn_level(float(ratio_par_mean_ratio_results[i]))
c = cn_level(float(ratio_par_mean_ratio_results[i]), chr)
if z_score >= zscore_threshold:
c = 3
......@@ -832,10 +939,10 @@ def detect_events(
pos_end = pos_start + window_size
events[chr][(pos_start, pos_end)] = c
sys.stderr.write("\t ending detect_events\n")
logging.info(f"Leaving detect_events for {chr}")
def segmentation(events, segment):
def segmentation(events, segment, chr):
"""
Segment the detected events into contiguous regions with the same copy number level.
......@@ -853,12 +960,11 @@ def segmentation(events, segment):
None
The function modifies the segment dictionary in place.
"""
sys.stderr.write("starting segmentation : \n")
logging.info(f"Entering segmentation for {chr}")
for k in events.keys():
sys.stderr.write("\tfor chromosome %s\n" % k)
starts = 0
oldPos = 0
oldLevel = -1
starts, oldPos, oldLevel = 0, 0, -1
for p in sorted(events[k].keys()):
level = events[k][p]
# new coordinates
......@@ -871,9 +977,7 @@ def segmentation(events, segment):
segment[k][index]["start"] = starts
segment[k][index]["end"] = oldPos + window_size
segment[k][index]["cn"] = oldLevel
oldPos = p[0]
starts = p[0]
oldLevel = level
oldPos, starts, oldLevel = p[0], p[0], level
continue
else:
starts = p[0]
......@@ -887,18 +991,15 @@ def segmentation(events, segment):
segment[k][index]["start"] = starts
segment[k][index]["end"] = oldPos
segment[k][index]["cn"] = oldLevel
oldPos = p[0]
starts = p[0]
oldLevel = level
oldPos, starts, oldLevel = p[0], p[0], level
continue
else:
oldLevel = level
oldPos = p[0]
oldLevel = level
sys.stderr.write("segmentation done\n")
oldPos, oldLevel = p[0], level
logging.info(f"Leaving segmentation for {chr}")
def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
def display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr):
"""
Generate a VCF file containing structural variant calls based on segmented regions and signal data.
......@@ -923,7 +1024,7 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
This function writes the structural variant calls to the specified output file in VCF format.
"""
global header_written
sys.stderr.write("starting display results\n")
logging.info(f"Entering display_results_vcf for {chr}")
with open(output_file, "a") as f:
if not header_written:
......@@ -974,10 +1075,9 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
)
)
logging.info(f"Leaving display_results_vcf for {chr}")
#################################
###### <---Fonction main--->######
#################################
def main_calcul(
bamfile_path,
chr,
......@@ -1031,6 +1131,8 @@ def main_calcul(
map_data = calcul_mappability(seq_length, mappability, chr)
gc_data = calcul_gc_content(seq_length, chr, seq)
depth_data = calcul_depth_seq(seq_length, bamfile_path, chr)
#depth_data_count = calcul_depth_seq_count(seq_length, bamfile_path, chr)
#depth_data = calcul_depth_seq_copy(seq_length, bamfile_path, chr)
# Transférer le tableau NumPy vers CUDA
d_depth_data = cuda.mem_alloc(depth_data.nbytes)
......@@ -1040,6 +1142,13 @@ def main_calcul(
% (d_depth_data, d_depth_data.as_buffer(sys.getsizeof(depth_data)))
)
#d_depth_data_count = cuda.mem_alloc(depth_data_count.nbytes)
#cuda.memcpy_htod(d_depth_data_count, depth_data_count)
#sys.stderr.write(
# "\t d_depth_data_count : %s, %s\n"
# % (d_depth_data_count, d_depth_data_count.as_buffer(sys.getsizeof(depth_data_count)))
#)
d_gc_data = cuda.mem_alloc(gc_data.nbytes)
cuda.memcpy_htod(d_gc_data, gc_data)
sys.stderr.write(
......@@ -1066,6 +1175,11 @@ def main_calcul(
)
sys.stderr.write("\t Definition de depth_results\n")
depth_results_count = np.zeros(
int((seq_length - window_size) / step_size) + 1, dtype=np.float32
)
sys.stderr.write("\t Definition de depth_results_count\n")
gc_results = np.zeros(
int((seq_length - window_size) / step_size) + 1, dtype=np.float32
)
......@@ -1109,6 +1223,13 @@ def main_calcul(
)
sys.stderr.write("\t depth_results.nbytes = %s\n" % depth_results.nbytes)
d_depth_results_count = cuda.mem_alloc(depth_results_count.nbytes)
sys.stderr.write(
"\t d_depth_results_count = %s\n"
% d_depth_results_count.as_buffer(sys.getsizeof(d_depth_results_count))
)
sys.stderr.write("\t depth_results_count.nbytes = %s\n" % depth_results_count.nbytes)
d_gc_results = cuda.mem_alloc(gc_results.nbytes)
sys.stderr.write(
"\t d_gc_results = %s\n" % d_gc_results.as_buffer(sys.getsizeof(d_gc_results))
......@@ -1184,6 +1305,17 @@ def main_calcul(
)
sys.stderr.write("\t appel fonction calcul_depth_kernel_cuda\n")
#calcul_depth_count_kernel_cuda(
# d_depth_data_count,
# np.int32(seq_length),
# np.int32(window_size),
#np.int32(step_size),
#d_depth_results,
#block=(block_size, 1, 1),
#grid=(grid_size, 1),
#)
#sys.stderr.write("\t appel fonction calcul_depth_count_kernel_cuda\n")
calcul_gc_kernel_cuda(
d_gc_data,
np.int32(seq_length),
......@@ -1227,6 +1359,11 @@ def main_calcul(
"\t Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)\n"
)
#cuda.memcpy_dtoh(depth_results_count, d_depth_results_count)
#sys.stderr.write(
# "\t Copie les resultats du GPU (d_depth_results_count) vers le CPU (depth_results_count)\n"
#)
cuda.memcpy_dtoh(gc_results, d_gc_results) # cuda.memcpy_dtoh(dest, src)
sys.stderr.write(
"\t Copie les resultats du GPU (d_gc_results) vers le CPU (gc_results)\n"
......@@ -1248,8 +1385,8 @@ def main_calcul(
# Appel fonctions medianes
sys.stderr.write("\t appel fonctions calcul medianes\n")
m = calcul_med_total(depth_correction_results)
gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results)
m = calcul_med_total(depth_correction_results, chr)
gc_to_median = calcul_med_same_gc(gc_results, depth_correction_results, chr)
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
sys.stderr.write("\t Conversion medianes en tableau numpy\n")
......@@ -1286,7 +1423,7 @@ def main_calcul(
# Appel fonction moyenne
sys.stderr.write("\t appel fonction calcul moyenne\n")
mean_chr = calcul_moy_totale(normalize_depth_results)
mean_chr = calcul_moy_totale(normalize_depth_results, chr)
# Appeler le kernel de ratio
ratio_par_window_kernel_cuda(
......@@ -1307,7 +1444,7 @@ def main_calcul(
cuda.memcpy_dtoh(ratio_par_window_results, d_ratio_par_window_results)
# Création table à partir du ratio
mean_ratio, std_ratio, med_ratio = compute_mean_and_std(ratio_par_window_results)
mean_ratio, std_ratio, med_ratio = compute_mean_std_med(ratio_par_window_results, chr)
# Appeler le kernel de calcule du ratio divisé par le ratio moyen
ratio_par_mean_ratio_kernel_cuda(
......@@ -1356,11 +1493,18 @@ def main_calcul(
)
# Appel fonction segmentation
segmentation(events, segment)
segmentation(events, segment, chr)
# Appel fonction display_results_vcf
display_results_vcf(sample, segment, signal, lengthFilter, output_file)
display_results_vcf(sample, segment, signal, lengthFilter, output_file, chr)
#Ecrire les résultats dans le fichier de sortie
#with open(output_file, 'a') as f:
# sys.stderr.write("\t ecriture des fichiers\n")
# for i, (depth_data, depth_correction, depth_normalize_val, gc_data, map_data, ratio, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, depth_correction_results, normalize_depth_results, gc_results, map_results, ratio_par_window_results, ratio_par_mean_ratio_results, z_score_results)):
# pos_start = (i * step_size) + 1
# pos_end = pos_start + window_size
# f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_correction}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio}\t{ratio_mean_ratio}\t{z_score}\n")
# Programme principal
# Calcul nombre de coeurs max pour le GPU
......@@ -1374,11 +1518,12 @@ print("Nombre de CPU: ", multiprocessing.cpu_count())
print(f"Nombre de coeurs max GPU: {num_cores}")
gc_file = "/work/gad/shared/pipeline/grch38/index/grch38_essential.fa"
mappability_file = "/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph"
mappability_file = "/work/gad/shared/pipeline/grch38/cnv/k100.Umap.MultiTrackMappability.bedgraph"
seq = parse_fasta(gc_file)
mappability = dico_mappabilite(mappability_file)
with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
if num_chr == "ALL" :
with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
for i, seq_length in enumerate(bamfile_handle.lengths):
chr = bamfile_handle.references[i]
sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))
......@@ -1404,3 +1549,29 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
)
logging.info("end")
else :
with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
seq_length = bamfile_handle.lengths[int(num_chr) - 1]
chr = bamfile_handle.references[int(num_chr) - 1]
sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))
# Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
main_calcul(
bamfile_handle,
chr,
seq_length,
window_size,
step_size,
zscore_threshold,
lengthFilter,
output_file,
sample,
)
logging.basicConfig(
filename="%s" % (logfile),
filemode="a",
level=logging.INFO,
format="%(asctime)s %(levelname)s - %(message)s",
)
logging.info("end")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment