Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
cnvCallerGPU
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
cnvCallerGPU
Commits
be5e1926
Commit
be5e1926
authored
Jun 19, 2024
by
Theo Serralta
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Formatting code and add functions
parent
91be97c9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
292 additions
and
121 deletions
+292
-121
test_gpu_mean_depth.py
CNV/test_gpu_mean_depth.py
+292
-121
No files found.
CNV/test_gpu_mean_depth.py
View file @
be5e1926
...
@@ -47,19 +47,22 @@ def parse_arguments():
...
@@ -47,19 +47,22 @@ def parse_arguments():
Each element can be None if the corresponding argument was not provided.
Each element can be None if the corresponding argument was not provided.
"""
"""
try
:
try
:
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"b:w:s:z:l:t:o:e:"
)
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"b:
c:
w:s:z:l:t:o:e:"
)
(
(
bamfile_path
,
bamfile_path
,
num_chr
,
window_size
,
window_size
,
step_size
,
step_size
,
zscore_threshold
,
zscore_threshold
,
lengthFilter
,
lengthFilter
,
output_file
,
output_file
,
logfile
,
logfile
,
)
=
(
None
,
None
,
None
,
None
,
None
,
None
,
None
)
)
=
(
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)
for
opt
,
arg
in
opts
:
for
opt
,
arg
in
opts
:
if
opt
in
(
"-b"
):
if
opt
in
(
"-b"
):
bamfile_path
=
arg
bamfile_path
=
arg
if
opt
in
(
"-c"
):
num_chr
=
arg
if
opt
in
(
"-w"
):
if
opt
in
(
"-w"
):
window_size
=
int
(
arg
)
window_size
=
int
(
arg
)
if
opt
in
(
"-s"
):
if
opt
in
(
"-s"
):
...
@@ -74,6 +77,7 @@ def parse_arguments():
...
@@ -74,6 +77,7 @@ def parse_arguments():
logfile
=
arg
logfile
=
arg
return
(
return
(
bamfile_path
,
bamfile_path
,
num_chr
,
window_size
,
window_size
,
step_size
,
step_size
,
zscore_threshold
,
zscore_threshold
,
...
@@ -89,6 +93,7 @@ def parse_arguments():
...
@@ -89,6 +93,7 @@ def parse_arguments():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
(
(
bamfile_path
,
bamfile_path
,
num_chr
,
window_size
,
window_size
,
step_size
,
step_size
,
zscore_threshold
,
zscore_threshold
,
...
@@ -128,6 +133,24 @@ __global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_
...
@@ -128,6 +133,24 @@ __global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_
}
}
}
}
__global__ void calcul_depth_count_kernel(int *depth_data_count, int seq_length, int window_size, int step_size, float *depth_results_count) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
int pos_start = (idx * step_size) + 1;
int pos_end = pos_start + window_size;
int count_reads = 0;
for (int i = pos_start; i < pos_end; i++) {
count_reads += depth_data_count[i];
}
float avg_depth_count = (float)count_reads / window_size;
depth_results_count[idx] = avg_depth_count;
}
}
//Kernel pour calculer le GC content
//Kernel pour calculer le GC content
__global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) {
__global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) {
...
@@ -245,6 +268,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio, float mean_ratio, int
...
@@ -245,6 +268,7 @@ __global__ void ratio_par_mean_ratio_kernel(float *ratio, float mean_ratio, int
# Obtention de la fonction de kernel compilée
# Obtention de la fonction de kernel compilée
calcul_depth_kernel_cuda
=
mod
.
get_function
(
"calcul_depth_kernel"
)
calcul_depth_kernel_cuda
=
mod
.
get_function
(
"calcul_depth_kernel"
)
calcul_depth_count_kernel_cuda
=
mod
.
get_function
(
"calcul_depth_count_kernel"
)
calcul_gc_kernel_cuda
=
mod
.
get_function
(
"calcul_gc_kernel"
)
calcul_gc_kernel_cuda
=
mod
.
get_function
(
"calcul_gc_kernel"
)
calcul_map_kernel_cuda
=
mod
.
get_function
(
"calcul_map_kernel"
)
calcul_map_kernel_cuda
=
mod
.
get_function
(
"calcul_map_kernel"
)
calcul_depth_correction_kernel_cuda
=
mod
.
get_function
(
"calcul_depth_correction_kernel"
)
calcul_depth_correction_kernel_cuda
=
mod
.
get_function
(
"calcul_depth_correction_kernel"
)
...
@@ -280,6 +304,7 @@ def merge_intervals(intervals):
...
@@ -280,6 +304,7 @@ def merge_intervals(intervals):
merged
.
append
((
start
,
end
,
score
))
merged
.
append
((
start
,
end
,
score
))
start
,
end
,
score
=
interval
start
,
end
,
score
=
interval
merged
.
append
((
start
,
end
,
score
))
merged
.
append
((
start
,
end
,
score
))
return
merged
return
merged
...
@@ -301,9 +326,12 @@ def dico_mappabilite(mappability_file):
...
@@ -301,9 +326,12 @@ def dico_mappabilite(mappability_file):
A dictionary where keys are chromosome names and values are another dictionary with start positions as keys
A dictionary where keys are chromosome names and values are another dictionary with start positions as keys
and mappability scores as values.
and mappability scores as values.
"""
"""
sys
.
stderr
.
write
(
"
\t
Entering dico_mappabilite
\n
"
)
logging
.
info
(
"Entering dico_mappability"
)
mappability_dico
=
{}
mappability_dico
=
{}
logging
.
info
(
" In dico_mappability : Open file and create the dico"
)
with
open
(
mappability_file
,
"r"
)
as
f
:
with
open
(
mappability_file
,
"r"
)
as
f
:
for
line
in
f
:
for
line
in
f
:
fields
=
line
.
strip
()
.
split
(
"
\t
"
)
fields
=
line
.
strip
()
.
split
(
"
\t
"
)
...
@@ -320,19 +348,29 @@ def dico_mappabilite(mappability_file):
...
@@ -320,19 +348,29 @@ def dico_mappabilite(mappability_file):
mappability_dico
[
chromosome
]
.
append
((
start_pos
,
end_pos
,
score
))
mappability_dico
[
chromosome
]
.
append
((
start_pos
,
end_pos
,
score
))
logging
.
info
(
f
"In dico_mappability : Leaving file and create the dico"
)
# Add position 0 for each chromosome
# Add position 0 for each chromosome
logging
.
info
(
" In dico_mappability : Add position 0 for each chromosome"
)
for
chromosome
,
intervals
in
mappability_dico
.
items
():
for
chromosome
,
intervals
in
mappability_dico
.
items
():
if
intervals
[
0
][
0
]
!=
0
:
if
intervals
[
0
][
0
]
!=
0
:
mappability_dico
[
chromosome
]
.
insert
(
0
,
(
0
,
intervals
[
0
][
0
],
0
))
mappability_dico
[
chromosome
]
.
insert
(
0
,
(
0
,
intervals
[
0
][
0
],
0
))
logging
.
info
(
f
"In dico_mappability : Ending add position 0 for each chromosome"
)
# Merge intervals with the same score
# Merge intervals with the same score
logging
.
info
(
" In dico_mappability : Merge intervals with the same score"
)
for
chromosome
,
intervals
in
mappability_dico
.
items
():
for
chromosome
,
intervals
in
mappability_dico
.
items
():
merged_intervals
=
merge_intervals
(
intervals
)
merged_intervals
=
merge_intervals
(
intervals
)
mappability_dico
[
chromosome
]
=
{
mappability_dico
[
chromosome
]
=
{
start
:
score
for
start
,
_
,
score
in
merged_intervals
start
:
score
for
start
,
_
,
score
in
merged_intervals
}
}
logging
.
info
(
f
"In dico_mappability : Ending merge intervals with the same score"
)
logging
.
info
(
f
"Leaving dico_mappability"
)
sys
.
stderr
.
write
(
"
\t
Leaving dico_mappabilite
\n
"
)
return
mappability_dico
return
mappability_dico
...
@@ -357,24 +395,33 @@ def calcul_mappability(seq_length, mappability, chr):
...
@@ -357,24 +395,33 @@ def calcul_mappability(seq_length, mappability, chr):
numpy.ndarray
numpy.ndarray
An array of mappability scores for the given sequence length.
An array of mappability scores for the given sequence length.
"""
"""
sys
.
stderr
.
write
(
"
\t
Entering calcul_mappability
\n
"
)
logging
.
info
(
f
"Entering calcul_mappability for {chr}"
)
map_data
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
float32
)
map_data
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
float32
)
sorted_keys
=
sorted
(
mappability
[
chr
]
.
keys
())
sorted_keys
=
sorted
(
mappability
[
chr
]
.
keys
())
prev_bound
=
0
prev_bound
=
0
prev_mappability
=
0
prev_mappability
=
0
logging
.
info
(
f
"In calcul_mappability : Entering for bound in sorted_keys for {chr}"
)
for
bound
in
sorted_keys
:
for
bound
in
sorted_keys
:
for
i
in
range
(
prev_bound
,
min
(
seq_length
,
bound
)):
for
i
in
range
(
prev_bound
,
min
(
seq_length
,
bound
)):
map_data
[
i
]
=
prev_mappability
map_data
[
i
]
=
prev_mappability
prev_bound
=
bound
prev_bound
=
bound
prev_mappability
=
mappability
[
chr
][
bound
]
prev_mappability
=
mappability
[
chr
][
bound
]
logging
.
info
(
f
"In calcul_mappability : Leaving for bound in sorted_keys for {chr}"
)
# Fill remaining positions if sequence length exceeds last bound
# Fill remaining positions if sequence length exceeds last bound
logging
.
info
(
f
"In calcul_mappability : Entering for i in range(prev_bound, seq_length) for {chr}"
)
for
i
in
range
(
prev_bound
,
seq_length
):
for
i
in
range
(
prev_bound
,
seq_length
):
map_data
[
i
]
=
prev_mappability
map_data
[
i
]
=
prev_mappability
sys
.
stderr
.
write
(
"
\t
Leaving calcul_mappability
\n
"
)
logging
.
info
(
f
"In calcul_mappability : Leaving for i in range(prev_bound, seq_length) for {chr}"
)
logging
.
info
(
f
"Leaving dico_mappability for {chr}"
)
return
map_data
return
map_data
...
@@ -394,7 +441,8 @@ def parse_fasta(gc_file):
...
@@ -394,7 +441,8 @@ def parse_fasta(gc_file):
dict
dict
A dictionary where keys are sequence headers and values are the corresponding sequences.
A dictionary where keys are sequence headers and values are the corresponding sequences.
"""
"""
sys
.
stderr
.
write
(
"
\t
Entering parse_fasta
\n
"
)
logging
.
info
(
"Entering parse_fasta"
)
sequences
=
{}
sequences
=
{}
with
open
(
gc_file
,
"r"
)
as
f
:
with
open
(
gc_file
,
"r"
)
as
f
:
data
=
f
.
read
()
.
split
(
">"
)
data
=
f
.
read
()
.
split
(
">"
)
...
@@ -404,7 +452,8 @@ def parse_fasta(gc_file):
...
@@ -404,7 +452,8 @@ def parse_fasta(gc_file):
sequence
=
""
.
join
(
lines
[
1
:])
sequence
=
""
.
join
(
lines
[
1
:])
sequences
[
header
]
=
sequence
sequences
[
header
]
=
sequence
sys
.
stderr
.
write
(
"
\t
Leaving parse_fasta
\n
"
)
logging
.
info
(
f
"Leaving parse_fasta"
)
return
sequences
return
sequences
...
@@ -428,12 +477,14 @@ def calcul_gc_content(seq_length, chr, seq):
...
@@ -428,12 +477,14 @@ def calcul_gc_content(seq_length, chr, seq):
numpy.ndarray
numpy.ndarray
An array of bytes ('S' dtype) representing the GC content for the given sequence length.
An array of bytes ('S' dtype) representing the GC content for the given sequence length.
"""
"""
sys
.
stderr
.
write
(
"
\t
Entering calcul_gc_content
\n
"
)
logging
.
info
(
f
"Entering calcul_gc_content for {chr}"
)
gc_data
=
np
.
zeros
(
seq_length
,
dtype
=
"S"
)
gc_data
=
np
.
zeros
(
seq_length
,
dtype
=
"S"
)
for
i
in
range
(
len
(
seq
[
chr
])):
for
i
in
range
(
len
(
seq
[
chr
])):
gc_data
[
i
]
=
seq
[
chr
][
i
]
gc_data
[
i
]
=
seq
[
chr
][
i
]
sys
.
stderr
.
write
(
"
\t
Leaving calcul_gc_content
\n
"
)
logging
.
info
(
f
"Leaving calcul_gc_content for {chr}"
)
return
gc_data
return
gc_data
...
@@ -457,18 +508,103 @@ def calcul_depth_seq(seq_length, bamfile_path, chr):
...
@@ -457,18 +508,103 @@ def calcul_depth_seq(seq_length, bamfile_path, chr):
numpy.ndarray
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
An array of integers representing the sequencing depth for the given sequence length.
"""
"""
sys
.
stderr
.
write
(
"
\t
Entering calcul_depth_seq
\n
"
)
logging
.
info
(
f
"Entering calcul_depth_seq for {chr}"
)
depth_data
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
int32
)
depth_data
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
int32
)
for
pileupcolumn
in
bamfile_path
.
pileup
():
for
pileupcolumn
in
bamfile_path
.
pileup
(
reference
=
chr
):
if
pileupcolumn
.
reference_pos
>=
seq_length
:
pos
=
pileupcolumn
.
reference_pos
if
pos
>=
seq_length
:
break
break
depth_data
[
pileupcolumn
.
reference_pos
]
=
pileupcolumn
.
nsegments
depth_data
[
pos
]
=
pileupcolumn
.
nsegments
#depth_data = bamfile_path.pileup().nsegments
logging
.
info
(
f
"Leaving calcul_depth_seq for {chr}"
)
return
depth_data
def
calcul_depth_seq_copy
(
seq_length
,
bamfile_path
,
chr
):
"""
Calculate the sequencing depth for a given chromosome.
This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
Parameters
----------
seq_length : int
The length of the sequence.
bamfile_path : pysam.AlignmentFile
The path to the BAM file opened with pysam.AlignmentFile.
chr : str
The chromosome for which the depth is calculated.
Returns
-------
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
"""
logging
.
info
(
f
"Entering calcul_depth_seq for {chr}"
)
depth_data
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
int32
)
i
=
0
it
=
bamfile_path
.
pileup
(
reference
=
chr
)
while
i
<
(
seq_length
-
1
):
t
=
next
(
it
)
pos
=
t
.
reference_pos
depth_data
[
pos
]
=
t
.
nsegments
print
(
i
)
i
+=
1
#for pileupcolumn in bamfile_path.pileup():
# pos = pileupcolumn.reference_pos
# if pos >= seq_length:
# break
#depth_data[pos] = pileupcolumn.nsegments
#print(bamfile_path.pileup()) print("########################")
# depth_data = np.array(bamfile_path.pileup().nsegments)
logging
.
info
(
f
"Leaving calcul_depth_seq for {chr}"
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_depth_seq
\n
"
)
return
depth_data
return
depth_data
def
calcul_depth_seq_count
(
seq_length
,
bamfile_path
,
chr
):
"""
Calculate the sequencing depth for a given chromosome.
This function computes the sequencing depth for a specific chromosome and sequence length using a BAM file.
Parameters
----------
seq_length : int
The length of the sequence.
bamfile_path : pysam.AlignmentFile
The path to the BAM file opened with pysam.AlignmentFile.
chr : str
The chromosome for which the depth is calculated.
Returns
-------
numpy.ndarray
An array of integers representing the sequencing depth for the given sequence length.
"""
logging
.
info
(
f
"Entering calcul_depth_seq for {chr}"
)
depth_data_count
=
np
.
zeros
(
seq_length
,
dtype
=
np
.
int32
)
for
i
in
range
(
seq_length
):
depth_data_count
[
i
]
=
bamfile_path
.
count
(
contig
=
chr
,
start
=
i
,
stop
=
i
+
1
)
logging
.
info
(
f
"Leaving calcul_depth_seq for {chr}"
)
return
depth_data_count
def
calcul_med_total
(
depth_correction_results
):
def
calcul_med_total
(
depth_correction_results
,
chr
):
"""
"""
Calculate the median of non-zero depth correction results.
Calculate the median of non-zero depth correction results.
...
@@ -484,17 +620,22 @@ def calcul_med_total(depth_correction_results):
...
@@ -484,17 +620,22 @@ def calcul_med_total(depth_correction_results):
float
float
The median of the non-zero depth correction values, or 0 if no non-zero values are present.
The median of the non-zero depth correction values, or 0 if no non-zero values are present.
"""
"""
sys
.
stderr
.
write
(
"
\t
entering calcul_med_total
\n
"
)
logging
.
info
(
f
"Entering calcul_med_total for {chr}"
)
depth_correction_results
=
np
.
array
(
depth_correction_results
)
depth_correction_results
=
np
.
array
(
depth_correction_results
)
# Filter results to remove zero values
# Filter results to remove zero values
non_zero_results
=
depth_correction_results
[
depth_correction_results
!=
0
]
non_zero_results
=
depth_correction_results
[
depth_correction_results
!=
0
]
# Calculate the median of non-zero results
# Calculate the median of non-zero results
m
=
np
.
median
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
m
=
np
.
median
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
sys
.
stderr
.
write
(
"
\t
Leaving calcul_med_total
\n
"
)
sys
.
stderr
.
write
(
"Chromosome :
%
s, med_chr :
%
s
\n
"
%
(
chr
,
m
))
logging
.
info
(
f
"Leaving calcul_med_total for {chr}"
)
return
m
return
m
def
calcul_med_same_gc
(
gc_results
,
depth_correction_results
):
def
calcul_med_same_gc
(
gc_results
,
depth_correction_results
,
chr
):
"""
"""
Calculate the median depth correction for each unique GC content value.
Calculate the median depth correction for each unique GC content value.
...
@@ -512,7 +653,8 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
...
@@ -512,7 +653,8 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
dict
dict
A dictionary where keys are unique GC content values and values are the median depth correction for those GC values.
A dictionary where keys are unique GC content values and values are the median depth correction for those GC values.
"""
"""
sys
.
stderr
.
write
(
"
\t
entering calcul_med_same_gc
\n
"
)
logging
.
info
(
f
"Entering calcul_med_same_gc for {chr}"
)
mGC
=
[]
mGC
=
[]
depth_correction_results_array
=
np
.
array
(
depth_correction_results
)
depth_correction_results_array
=
np
.
array
(
depth_correction_results
)
unique_gc_values
=
np
.
unique
(
gc_results
)
unique_gc_values
=
np
.
unique
(
gc_results
)
...
@@ -537,11 +679,12 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
...
@@ -537,11 +679,12 @@ def calcul_med_same_gc(gc_results, depth_correction_results):
gc_to_median
=
dict
(
mGC
)
gc_to_median
=
dict
(
mGC
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_med_same_gc
\n
"
)
logging
.
info
(
f
"Leaving calcul_med_same_gc for {chr}"
)
return
gc_to_median
return
gc_to_median
def
calcul_moy_totale
(
normalize_depth_results
):
def
calcul_moy_totale
(
normalize_depth_results
,
chr
):
"""
"""
Calculate the mean of non-zero normalized depth results.
Calculate the mean of non-zero normalized depth results.
...
@@ -557,45 +700,23 @@ def calcul_moy_totale(normalize_depth_results):
...
@@ -557,45 +700,23 @@ def calcul_moy_totale(normalize_depth_results):
float
float
The mean of the non-zero normalized depth values, or 0 if no non-zero values are present.
The mean of the non-zero normalized depth values, or 0 if no non-zero values are present.
"""
"""
sys
.
stderr
.
write
(
"
\t
entering calcul_moy_totale
\n
"
)
logging
.
info
(
f
"Entering calcul_moy_totale for {chr}"
)
normalize_depth_results
=
np
.
array
(
normalize_depth_results
)
normalize_depth_results
=
np
.
array
(
normalize_depth_results
)
# Filter results to remove zero values
# Filter results to remove zero values
non_zero_results
=
normalize_depth_results
[
normalize_depth_results
!=
0
]
non_zero_results
=
normalize_depth_results
[
normalize_depth_results
!=
0
]
# Calculate the mean of non-zero results
# Calculate the mean of non-zero results
mean_chr
=
np
.
mean
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
mean_chr
=
np
.
mean
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
print
(
mean_chr
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_moy_totale
\n
"
)
return
mean_chr
def
calcul_moy_totale_ratio
(
ratio_par_window_results
):
sys
.
stderr
.
write
(
"Chromosome :
%
s, mean_chr :
%
s
\n
"
%
(
chr
,
mean_chr
))
"""
Calculate the mean of non-zero ratio results per window.
This function filters out zero values from the ratio results per window and computes the mean of the remaining values.
Parameters
logging
.
info
(
f
"Leaving calcul_moy_totale for {chr}"
)
----------
ratio_par_window_results : list or numpy.ndarray
return
mean_chr
A list or array of ratio values per window.
Returns
-------
float
The mean of the non-zero ratio values per window, or 0 if no non-zero values are present.
"""
sys
.
stderr
.
write
(
"
\t
entering calcul_moy_totale_ratio
\n
"
)
ratio_par_window_results
=
np
.
array
(
ratio_par_window_results
)
# Filter results to remove zero values
non_zero_results
=
ratio_par_window_results
[
ratio_par_window_results
!=
0
]
# Calculate the mean of non-zero results
mean_ratio
=
np
.
mean
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
print
(
mean_ratio
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_moy_totale_ratio
\n
"
)
return
mean_ratio
def
calcul_std
(
normalize_depth_results
):
def
calcul_std
(
normalize_depth_results
,
chr
):
"""
"""
Calculate the standard deviation of non-zero normalized depth results.
Calculate the standard deviation of non-zero normalized depth results.
...
@@ -611,45 +732,23 @@ def calcul_std(normalize_depth_results):
...
@@ -611,45 +732,23 @@ def calcul_std(normalize_depth_results):
float
float
The standard deviation of the non-zero normalized depth values, or 0 if no non-zero values are present.
The standard deviation of the non-zero normalized depth values, or 0 if no non-zero values are present.
"""
"""
sys
.
stderr
.
write
(
"
\t
entering calcul_std
\n
"
)
logging
.
info
(
f
"Entering calcul_std for {chr}"
)
normalize_depth_results
=
np
.
array
(
normalize_depth_results
)
normalize_depth_results
=
np
.
array
(
normalize_depth_results
)
# Filter results to remove zero values
# Filter results to remove zero values
non_zero_results
=
normalize_depth_results
[
normalize_depth_results
!=
0
]
non_zero_results
=
normalize_depth_results
[
normalize_depth_results
!=
0
]
# Calculate the standard deviation of non-zero results
# Calculate the standard deviation of non-zero results
std_chr
=
np
.
std
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
std_chr
=
np
.
std
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
print
(
std_chr
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_std
\n
"
)
return
std_chr
def
calcul_std_ratio
(
ratio_par_window_results
):
"""
Calculate the standard deviation of non-zero ratio results per window.
This function filters out zero values from the ratio results per window and computes the standard deviation of the remaining values.
Parameters
----------
ratio_par_window_results : list or numpy.ndarray
A list or array of ratio values per window.
Returns
sys
.
stderr
.
write
(
"Chromosome :
%
s, std_chr :
%
s
\n
"
%
(
chr
,
std_chr
))
-------
float
logging
.
info
(
f
"Leaving calcul_std for {chr}"
)
The standard deviation of the non-zero ratio values per window, or 0 if no non-zero values are present.
"""
return
std_chr
sys
.
stderr
.
write
(
"
\t
entering calcul_std_ratio
\n
"
)
ratio_par_window_results
=
np
.
array
(
ratio_par_window_results
)
# Filter results to remove zero values
non_zero_results
=
ratio_par_window_results
[
ratio_par_window_results
!=
0
]
# Calculate the standard deviation of non-zero results
std_ratio
=
np
.
std
(
non_zero_results
)
if
non_zero_results
.
size
>
0
else
0
print
(
std_ratio
)
sys
.
stderr
.
write
(
"
\t
Leaving calcul_std_ratio
\n
"
)
return
std_ratio
def
compute_mean_
and_std
(
ratio_par_window_results
):
def
compute_mean_
std_med
(
ratio_par_window_results
,
chr
):
"""
"""
Compute the mean, standard deviation, and median of non-zero ratio results per window.
Compute the mean, standard deviation, and median of non-zero ratio results per window.
...
@@ -665,7 +764,7 @@ def compute_mean_and_std(ratio_par_window_results):
...
@@ -665,7 +764,7 @@ def compute_mean_and_std(ratio_par_window_results):
tuple
tuple
A tuple containing the mean, standard deviation, and median of the filtered ratio values.
A tuple containing the mean, standard deviation, and median of the filtered ratio values.
"""
"""
sys
.
stderr
.
write
(
"Computing stats :
\n
"
)
logging
.
info
(
f
"Entering compute_mean_std_med for {chr}
"
)
# Filter results to remove zero and -1 values
# Filter results to remove zero and -1 values
ratio_par_window_results
=
np
.
array
(
ratio_par_window_results
)
ratio_par_window_results
=
np
.
array
(
ratio_par_window_results
)
...
@@ -685,15 +784,14 @@ def compute_mean_and_std(ratio_par_window_results):
...
@@ -685,15 +784,14 @@ def compute_mean_and_std(ratio_par_window_results):
std_ratio
=
np
.
std
(
table
)
if
table
else
0
std_ratio
=
np
.
std
(
table
)
if
table
else
0
med_ratio
=
np
.
median
(
table
)
if
table
else
0
med_ratio
=
np
.
median
(
table
)
if
table
else
0
# Display results
sys
.
stderr
.
write
(
"Chromosome :
%
s, mean_ratio :
%
s, std_ratio :
%
s, med_ratio :
%
s
\n
"
%
(
chr
,
mean_ratio
,
std_ratio
,
med_ratio
))
print
(
mean_ratio
,
std_ratio
,
med_ratio
)
sys
.
stderr
.
write
(
"Computation done
\n
"
)
logging
.
info
(
f
"Leaving compute_mean_std_med for {chr}"
)
# Return results
# Return results
return
mean_ratio
,
std_ratio
,
med_ratio
return
mean_ratio
,
std_ratio
,
med_ratio
def
cn_level
(
x
):
def
cn_level
(
x
,
chr
):
"""
"""
Determine the copy number level based on the given value.
Determine the copy number level based on the given value.
...
@@ -744,10 +842,15 @@ def get_sample_name(bamfile_path):
...
@@ -744,10 +842,15 @@ def get_sample_name(bamfile_path):
str
str
The sample name extracted from the BAM file. If no sample name is found, returns "UnknownSample".
The sample name extracted from the BAM file. If no sample name is found, returns "UnknownSample".
"""
"""
logging
.
info
(
f
"Entering get_sample_name"
)
with
pysam
.
AlignmentFile
(
bamfile_path
,
"rb"
)
as
bamfile
:
with
pysam
.
AlignmentFile
(
bamfile_path
,
"rb"
)
as
bamfile
:
for
read_group
in
bamfile
.
header
.
get
(
"RG"
,
[]):
for
read_group
in
bamfile
.
header
.
get
(
"RG"
,
[]):
if
"SM"
in
read_group
:
if
"SM"
in
read_group
:
return
read_group
[
"SM"
]
return
read_group
[
"SM"
]
logging
.
info
(
f
"Leaving get_sample_name"
)
return
"UnknownSample"
return
"UnknownSample"
...
@@ -773,12 +876,15 @@ def create_signal(signal, chr, z_score_results, step_size):
...
@@ -773,12 +876,15 @@ def create_signal(signal, chr, z_score_results, step_size):
None
None
The function modifies the signal dictionary in place.
The function modifies the signal dictionary in place.
"""
"""
logging
.
info
(
f
"Entering create_signal for {chr} (include copy_number_level)"
)
if
chr
not
in
signal
:
if
chr
not
in
signal
:
signal
[
chr
]
=
{}
signal
[
chr
]
=
{}
for
i
in
range
(
len
(
z_score_results
)):
for
i
in
range
(
len
(
z_score_results
)):
pos
=
(
i
*
step_size
)
+
1
pos
=
(
i
*
step_size
)
+
1
signal
[
chr
][
pos
]
=
z_score_results
[
i
]
signal
[
chr
][
pos
]
=
z_score_results
[
i
]
logging
.
info
(
f
"Leaving create_signal for {chr} (include copy_number_level)"
)
def
detect_events
(
def
detect_events
(
z_score_results
,
z_score_results
,
...
@@ -813,7 +919,8 @@ def detect_events(
...
@@ -813,7 +919,8 @@ def detect_events(
None
None
The function modifies the events dictionary in place.
The function modifies the events dictionary in place.
"""
"""
sys
.
stderr
.
write
(
"
\t
starting detect_events
\n
"
)
logging
.
info
(
f
"Entering detect_events for {chr}"
)
c
=
0
c
=
0
for
i
,
z_score
in
enumerate
(
z_score_results
):
for
i
,
z_score
in
enumerate
(
z_score_results
):
if
(
z_score
<=
-
zscore_threshold
)
or
(
z_score
>=
zscore_threshold
):
if
(
z_score
<=
-
zscore_threshold
)
or
(
z_score
>=
zscore_threshold
):
...
@@ -822,7 +929,7 @@ def detect_events(
...
@@ -822,7 +929,7 @@ def detect_events(
if
med_ratio
==
0
:
if
med_ratio
==
0
:
c
=
0
c
=
0
else
:
else
:
c
=
cn_level
(
float
(
ratio_par_mean_ratio_results
[
i
]))
c
=
cn_level
(
float
(
ratio_par_mean_ratio_results
[
i
])
,
chr
)
if
z_score
>=
zscore_threshold
:
if
z_score
>=
zscore_threshold
:
c
=
3
c
=
3
...
@@ -832,10 +939,10 @@ def detect_events(
...
@@ -832,10 +939,10 @@ def detect_events(
pos_end
=
pos_start
+
window_size
pos_end
=
pos_start
+
window_size
events
[
chr
][(
pos_start
,
pos_end
)]
=
c
events
[
chr
][(
pos_start
,
pos_end
)]
=
c
sys
.
stderr
.
write
(
"
\t
ending detect_events
\n
"
)
logging
.
info
(
f
"Leaving detect_events for {chr}"
)
def
segmentation
(
events
,
segment
):
def
segmentation
(
events
,
segment
,
chr
):
"""
"""
Segment the detected events into contiguous regions with the same copy number level.
Segment the detected events into contiguous regions with the same copy number level.
...
@@ -853,12 +960,11 @@ def segmentation(events, segment):
...
@@ -853,12 +960,11 @@ def segmentation(events, segment):
None
None
The function modifies the segment dictionary in place.
The function modifies the segment dictionary in place.
"""
"""
sys
.
stderr
.
write
(
"starting segmentation :
\n
"
)
logging
.
info
(
f
"Entering segmentation for {chr}"
)
for
k
in
events
.
keys
():
for
k
in
events
.
keys
():
sys
.
stderr
.
write
(
"
\t
for chromosome
%
s
\n
"
%
k
)
sys
.
stderr
.
write
(
"
\t
for chromosome
%
s
\n
"
%
k
)
starts
=
0
starts
,
oldPos
,
oldLevel
=
0
,
0
,
-
1
oldPos
=
0
oldLevel
=
-
1
for
p
in
sorted
(
events
[
k
]
.
keys
()):
for
p
in
sorted
(
events
[
k
]
.
keys
()):
level
=
events
[
k
][
p
]
level
=
events
[
k
][
p
]
# new coordinates
# new coordinates
...
@@ -871,9 +977,7 @@ def segmentation(events, segment):
...
@@ -871,9 +977,7 @@ def segmentation(events, segment):
segment
[
k
][
index
][
"start"
]
=
starts
segment
[
k
][
index
][
"start"
]
=
starts
segment
[
k
][
index
][
"end"
]
=
oldPos
+
window_size
segment
[
k
][
index
][
"end"
]
=
oldPos
+
window_size
segment
[
k
][
index
][
"cn"
]
=
oldLevel
segment
[
k
][
index
][
"cn"
]
=
oldLevel
oldPos
=
p
[
0
]
oldPos
,
starts
,
oldLevel
=
p
[
0
],
p
[
0
],
level
starts
=
p
[
0
]
oldLevel
=
level
continue
continue
else
:
else
:
starts
=
p
[
0
]
starts
=
p
[
0
]
...
@@ -887,18 +991,15 @@ def segmentation(events, segment):
...
@@ -887,18 +991,15 @@ def segmentation(events, segment):
segment
[
k
][
index
][
"start"
]
=
starts
segment
[
k
][
index
][
"start"
]
=
starts
segment
[
k
][
index
][
"end"
]
=
oldPos
segment
[
k
][
index
][
"end"
]
=
oldPos
segment
[
k
][
index
][
"cn"
]
=
oldLevel
segment
[
k
][
index
][
"cn"
]
=
oldLevel
oldPos
=
p
[
0
]
oldPos
,
starts
,
oldLevel
=
p
[
0
],
p
[
0
],
level
starts
=
p
[
0
]
oldLevel
=
level
continue
continue
else
:
else
:
oldLevel
=
level
oldLevel
=
level
oldPos
=
p
[
0
]
oldPos
,
oldLevel
=
p
[
0
],
level
oldLevel
=
level
sys
.
stderr
.
write
(
"segmentation done
\n
"
)
logging
.
info
(
f
"Leaving segmentation for {chr}"
)
def
display_results_vcf
(
sample
,
segment
,
signal
,
lengthFilter
,
output_file
):
def
display_results_vcf
(
sample
,
segment
,
signal
,
lengthFilter
,
output_file
,
chr
):
"""
"""
Generate a VCF file containing structural variant calls based on segmented regions and signal data.
Generate a VCF file containing structural variant calls based on segmented regions and signal data.
...
@@ -923,7 +1024,7 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
...
@@ -923,7 +1024,7 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
This function writes the structural variant calls to the specified output file in VCF format.
This function writes the structural variant calls to the specified output file in VCF format.
"""
"""
global
header_written
global
header_written
sys
.
stderr
.
write
(
"starting display results
\n
"
)
logging
.
info
(
f
"Entering display_results_vcf for {chr}
"
)
with
open
(
output_file
,
"a"
)
as
f
:
with
open
(
output_file
,
"a"
)
as
f
:
if
not
header_written
:
if
not
header_written
:
...
@@ -973,11 +1074,10 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
...
@@ -973,11 +1074,10 @@ def display_results_vcf(sample, segment, signal, lengthFilter, output_file):
int
(
segment
[
k
][
elt
][
"cn"
]),
int
(
segment
[
k
][
elt
][
"cn"
]),
)
)
)
)
logging
.
info
(
f
"Leaving display_results_vcf for {chr}"
)
#################################
###### <---Fonction main--->######
#################################
def
main_calcul
(
def
main_calcul
(
bamfile_path
,
bamfile_path
,
chr
,
chr
,
...
@@ -1031,7 +1131,9 @@ def main_calcul(
...
@@ -1031,7 +1131,9 @@ def main_calcul(
map_data
=
calcul_mappability
(
seq_length
,
mappability
,
chr
)
map_data
=
calcul_mappability
(
seq_length
,
mappability
,
chr
)
gc_data
=
calcul_gc_content
(
seq_length
,
chr
,
seq
)
gc_data
=
calcul_gc_content
(
seq_length
,
chr
,
seq
)
depth_data
=
calcul_depth_seq
(
seq_length
,
bamfile_path
,
chr
)
depth_data
=
calcul_depth_seq
(
seq_length
,
bamfile_path
,
chr
)
#depth_data_count = calcul_depth_seq_count(seq_length, bamfile_path, chr)
#depth_data = calcul_depth_seq_copy(seq_length, bamfile_path, chr)
# Transférer le tableau NumPy vers CUDA
# Transférer le tableau NumPy vers CUDA
d_depth_data
=
cuda
.
mem_alloc
(
depth_data
.
nbytes
)
d_depth_data
=
cuda
.
mem_alloc
(
depth_data
.
nbytes
)
cuda
.
memcpy_htod
(
d_depth_data
,
depth_data
)
cuda
.
memcpy_htod
(
d_depth_data
,
depth_data
)
...
@@ -1039,6 +1141,13 @@ def main_calcul(
...
@@ -1039,6 +1141,13 @@ def main_calcul(
"
\t
d_depth_data :
%
s,
%
s
\n
"
"
\t
d_depth_data :
%
s,
%
s
\n
"
%
(
d_depth_data
,
d_depth_data
.
as_buffer
(
sys
.
getsizeof
(
depth_data
)))
%
(
d_depth_data
,
d_depth_data
.
as_buffer
(
sys
.
getsizeof
(
depth_data
)))
)
)
#d_depth_data_count = cuda.mem_alloc(depth_data_count.nbytes)
#cuda.memcpy_htod(d_depth_data_count, depth_data_count)
#sys.stderr.write(
# "\t d_depth_data_count : %s, %s\n"
# % (d_depth_data_count, d_depth_data_count.as_buffer(sys.getsizeof(depth_data_count)))
#)
d_gc_data
=
cuda
.
mem_alloc
(
gc_data
.
nbytes
)
d_gc_data
=
cuda
.
mem_alloc
(
gc_data
.
nbytes
)
cuda
.
memcpy_htod
(
d_gc_data
,
gc_data
)
cuda
.
memcpy_htod
(
d_gc_data
,
gc_data
)
...
@@ -1066,6 +1175,11 @@ def main_calcul(
...
@@ -1066,6 +1175,11 @@ def main_calcul(
)
)
sys
.
stderr
.
write
(
"
\t
Definition de depth_results
\n
"
)
sys
.
stderr
.
write
(
"
\t
Definition de depth_results
\n
"
)
depth_results_count
=
np
.
zeros
(
int
((
seq_length
-
window_size
)
/
step_size
)
+
1
,
dtype
=
np
.
float32
)
sys
.
stderr
.
write
(
"
\t
Definition de depth_results_count
\n
"
)
gc_results
=
np
.
zeros
(
gc_results
=
np
.
zeros
(
int
((
seq_length
-
window_size
)
/
step_size
)
+
1
,
dtype
=
np
.
float32
int
((
seq_length
-
window_size
)
/
step_size
)
+
1
,
dtype
=
np
.
float32
)
)
...
@@ -1109,6 +1223,13 @@ def main_calcul(
...
@@ -1109,6 +1223,13 @@ def main_calcul(
)
)
sys
.
stderr
.
write
(
"
\t
depth_results.nbytes =
%
s
\n
"
%
depth_results
.
nbytes
)
sys
.
stderr
.
write
(
"
\t
depth_results.nbytes =
%
s
\n
"
%
depth_results
.
nbytes
)
d_depth_results_count
=
cuda
.
mem_alloc
(
depth_results_count
.
nbytes
)
sys
.
stderr
.
write
(
"
\t
d_depth_results_count =
%
s
\n
"
%
d_depth_results_count
.
as_buffer
(
sys
.
getsizeof
(
d_depth_results_count
))
)
sys
.
stderr
.
write
(
"
\t
depth_results_count.nbytes =
%
s
\n
"
%
depth_results_count
.
nbytes
)
d_gc_results
=
cuda
.
mem_alloc
(
gc_results
.
nbytes
)
d_gc_results
=
cuda
.
mem_alloc
(
gc_results
.
nbytes
)
sys
.
stderr
.
write
(
sys
.
stderr
.
write
(
"
\t
d_gc_results =
%
s
\n
"
%
d_gc_results
.
as_buffer
(
sys
.
getsizeof
(
d_gc_results
))
"
\t
d_gc_results =
%
s
\n
"
%
d_gc_results
.
as_buffer
(
sys
.
getsizeof
(
d_gc_results
))
...
@@ -1184,6 +1305,17 @@ def main_calcul(
...
@@ -1184,6 +1305,17 @@ def main_calcul(
)
)
sys
.
stderr
.
write
(
"
\t
appel fonction calcul_depth_kernel_cuda
\n
"
)
sys
.
stderr
.
write
(
"
\t
appel fonction calcul_depth_kernel_cuda
\n
"
)
#calcul_depth_count_kernel_cuda(
# d_depth_data_count,
# np.int32(seq_length),
# np.int32(window_size),
#np.int32(step_size),
#d_depth_results,
#block=(block_size, 1, 1),
#grid=(grid_size, 1),
#)
#sys.stderr.write("\t appel fonction calcul_depth_count_kernel_cuda\n")
calcul_gc_kernel_cuda
(
calcul_gc_kernel_cuda
(
d_gc_data
,
d_gc_data
,
np
.
int32
(
seq_length
),
np
.
int32
(
seq_length
),
...
@@ -1226,6 +1358,11 @@ def main_calcul(
...
@@ -1226,6 +1358,11 @@ def main_calcul(
sys
.
stderr
.
write
(
sys
.
stderr
.
write
(
"
\t
Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)
\n
"
"
\t
Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)
\n
"
)
)
#cuda.memcpy_dtoh(depth_results_count, d_depth_results_count)
#sys.stderr.write(
# "\t Copie les resultats du GPU (d_depth_results_count) vers le CPU (depth_results_count)\n"
#)
cuda
.
memcpy_dtoh
(
gc_results
,
d_gc_results
)
# cuda.memcpy_dtoh(dest, src)
cuda
.
memcpy_dtoh
(
gc_results
,
d_gc_results
)
# cuda.memcpy_dtoh(dest, src)
sys
.
stderr
.
write
(
sys
.
stderr
.
write
(
...
@@ -1248,8 +1385,8 @@ def main_calcul(
...
@@ -1248,8 +1385,8 @@ def main_calcul(
# Appel fonctions medianes
# Appel fonctions medianes
sys
.
stderr
.
write
(
"
\t
appel fonctions calcul medianes
\n
"
)
sys
.
stderr
.
write
(
"
\t
appel fonctions calcul medianes
\n
"
)
m
=
calcul_med_total
(
depth_correction_results
)
m
=
calcul_med_total
(
depth_correction_results
,
chr
)
gc_to_median
=
calcul_med_same_gc
(
gc_results
,
depth_correction_results
)
gc_to_median
=
calcul_med_same_gc
(
gc_results
,
depth_correction_results
,
chr
)
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
# Convertir gc_to_median en un tableau NumPy pour le transfert vers CUDA
sys
.
stderr
.
write
(
"
\t
Conversion medianes en tableau numpy
\n
"
)
sys
.
stderr
.
write
(
"
\t
Conversion medianes en tableau numpy
\n
"
)
...
@@ -1286,7 +1423,7 @@ def main_calcul(
...
@@ -1286,7 +1423,7 @@ def main_calcul(
# Appel fonction moyenne
# Appel fonction moyenne
sys
.
stderr
.
write
(
"
\t
appel fonction calcul moyenne
\n
"
)
sys
.
stderr
.
write
(
"
\t
appel fonction calcul moyenne
\n
"
)
mean_chr
=
calcul_moy_totale
(
normalize_depth_results
)
mean_chr
=
calcul_moy_totale
(
normalize_depth_results
,
chr
)
# Appeler le kernel de ratio
# Appeler le kernel de ratio
ratio_par_window_kernel_cuda
(
ratio_par_window_kernel_cuda
(
...
@@ -1307,7 +1444,7 @@ def main_calcul(
...
@@ -1307,7 +1444,7 @@ def main_calcul(
cuda
.
memcpy_dtoh
(
ratio_par_window_results
,
d_ratio_par_window_results
)
cuda
.
memcpy_dtoh
(
ratio_par_window_results
,
d_ratio_par_window_results
)
# Création table à partir du ratio
# Création table à partir du ratio
mean_ratio
,
std_ratio
,
med_ratio
=
compute_mean_
and_std
(
ratio_par_window_results
)
mean_ratio
,
std_ratio
,
med_ratio
=
compute_mean_
std_med
(
ratio_par_window_results
,
chr
)
# Appeler le kernel de calcule du ratio divisé par le ratio moyen
# Appeler le kernel de calcule du ratio divisé par le ratio moyen
ratio_par_mean_ratio_kernel_cuda
(
ratio_par_mean_ratio_kernel_cuda
(
...
@@ -1356,11 +1493,18 @@ def main_calcul(
...
@@ -1356,11 +1493,18 @@ def main_calcul(
)
)
# Appel fonction segmentation
# Appel fonction segmentation
segmentation
(
events
,
segment
)
segmentation
(
events
,
segment
,
chr
)
# Appel fonction display_results_vcf
# Appel fonction display_results_vcf
display_results_vcf
(
sample
,
segment
,
signal
,
lengthFilter
,
output_file
)
display_results_vcf
(
sample
,
segment
,
signal
,
lengthFilter
,
output_file
,
chr
)
#Ecrire les résultats dans le fichier de sortie
#with open(output_file, 'a') as f:
# sys.stderr.write("\t ecriture des fichiers\n")
# for i, (depth_data, depth_correction, depth_normalize_val, gc_data, map_data, ratio, ratio_mean_ratio, z_score) in enumerate(zip(depth_results, depth_correction_results, normalize_depth_results, gc_results, map_results, ratio_par_window_results, ratio_par_mean_ratio_results, z_score_results)):
# pos_start = (i * step_size) + 1
# pos_end = pos_start + window_size
# f.write(f"{chr}\t{pos_start}\t{pos_end}\t{depth_data}\t{depth_correction}\t{depth_normalize_val}\t{gc_data}\t{map_data}\t{ratio}\t{ratio_mean_ratio}\t{z_score}\n")
# Programme principal
# Programme principal
# Calcul nombre de coeurs max pour le GPU
# Calcul nombre de coeurs max pour le GPU
...
@@ -1374,13 +1518,41 @@ print("Nombre de CPU: ", multiprocessing.cpu_count())
...
@@ -1374,13 +1518,41 @@ print("Nombre de CPU: ", multiprocessing.cpu_count())
print
(
f
"Nombre de coeurs max GPU: {num_cores}"
)
print
(
f
"Nombre de coeurs max GPU: {num_cores}"
)
gc_file
=
"/work/gad/shared/pipeline/grch38/index/grch38_essential.fa"
gc_file
=
"/work/gad/shared/pipeline/grch38/index/grch38_essential.fa"
mappability_file
=
"/work/gad/shared/
analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38
.bedgraph"
mappability_file
=
"/work/gad/shared/
pipeline/grch38/cnv/k100.Umap.MultiTrackMappability
.bedgraph"
seq
=
parse_fasta
(
gc_file
)
seq
=
parse_fasta
(
gc_file
)
mappability
=
dico_mappabilite
(
mappability_file
)
mappability
=
dico_mappabilite
(
mappability_file
)
with
pysam
.
AlignmentFile
(
bamfile_path
,
"rb"
)
as
bamfile_handle
:
if
num_chr
==
"ALL"
:
for
i
,
seq_length
in
enumerate
(
bamfile_handle
.
lengths
):
with
pysam
.
AlignmentFile
(
bamfile_path
,
"rb"
)
as
bamfile_handle
:
chr
=
bamfile_handle
.
references
[
i
]
for
i
,
seq_length
in
enumerate
(
bamfile_handle
.
lengths
):
chr
=
bamfile_handle
.
references
[
i
]
sys
.
stderr
.
write
(
"Chromosome :
%
s, seq length :
%
s
\n
"
%
(
chr
,
seq_length
))
# Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
main_calcul
(
bamfile_handle
,
chr
,
seq_length
,
window_size
,
step_size
,
zscore_threshold
,
lengthFilter
,
output_file
,
sample
,
)
logging
.
basicConfig
(
filename
=
"
%
s"
%
(
logfile
),
filemode
=
"a"
,
level
=
logging
.
INFO
,
format
=
"
%(asctime)
s
%(levelname)
s -
%(message)
s"
,
)
logging
.
info
(
"end"
)
else
:
with
pysam
.
AlignmentFile
(
bamfile_path
,
"rb"
)
as
bamfile_handle
:
seq_length
=
bamfile_handle
.
lengths
[
int
(
num_chr
)
-
1
]
chr
=
bamfile_handle
.
references
[
int
(
num_chr
)
-
1
]
sys
.
stderr
.
write
(
"Chromosome :
%
s, seq length :
%
s
\n
"
%
(
chr
,
seq_length
))
sys
.
stderr
.
write
(
"Chromosome :
%
s, seq length :
%
s
\n
"
%
(
chr
,
seq_length
))
# Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
# Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
...
@@ -1403,4 +1575,3 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
...
@@ -1403,4 +1575,3 @@ with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
format
=
"
%(asctime)
s
%(levelname)
s -
%(message)
s"
,
format
=
"
%(asctime)
s
%(levelname)
s -
%(message)
s"
,
)
)
logging
.
info
(
"end"
)
logging
.
info
(
"end"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment