Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
cnvCallerGPU
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
cnvCallerGPU
Commits
e3c16446
Commit
e3c16446
authored
Mar 04, 2025
by
Yannis Duffourd
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Renaming into Galacs
parent
51d0f376
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
513 additions
and
0 deletions
+513
-0
Galacs.cpp
Galacs.cpp
+358
-0
Galacs.cu
Galacs.cu
+97
-0
Galacs.h
Galacs.h
+58
-0
No files found.
Galacs.cpp
0 → 100644
View file @
e3c16446
// C++ std libs
#include <iostream>
// #include <iomanip>
#include <algorithm>
#include <fstream>
#include <map>
#include <sstream>
#include <stdexcept>
#include <string>
#include <sys/time.h>
#include <sys/types.h>
#include <vector>
// C++ Boost
#include <boost/program_options.hpp>
// htslib
#include <htslib/sam.h>
// utils
#include "utils.h"
// Classes
#include "Galacs.h"
using
namespace
std
;
Galacs
::
Galacs
()
{
}
Galacs
::
Galacs
(
std
::
string
incBamFile
,
std
::
string
incReferenceFile
,
std
::
string
incOutputFile
,
std
::
string
incOutputFilePairs
,
std
::
string
incOutputFileSplits
,
int
incWindow
,
int
incStep
,
double
incZscore
,
int
incNbthread
,
int
incLoggingLevel
)
{
this
->
bamFile
=
incBamFile
;
this
->
referenceFile
=
incReferenceFile
;
this
->
outputFile
=
incOutputFile
;
this
->
outputFilePairs
=
incOutputFilePairs
;
this
->
outputFileSplits
=
incOutputFileSplits
;
this
->
window
=
incWindow
;
this
->
step
=
incStep
;
this
->
zscore
=
incZscore
;
this
->
nbThread
=
incNbthread
;
this
->
loggingLevel
=
incLoggingLevel
;
}
int
Galacs
::
mainLoop
()
{
this
->
getGenome
();
this
->
displayGenome
();
this
->
bamParsingLauncher
();
return
0
;
}
int
Galacs
::
getGenome
()
{
struct
timeval
tbegin
,
tend
;
string
ligne
;
char
mot
;
string
header
=
">"
;
int
i
=
1
;
string
currentChr
;
map
<
int32_t
,
map
<
int32_t
,
int16_t
>
>
windows
;
for
(
int32_t
a
=
start
;
a
<
end
;
a
+=
this
->
getStepSize
())
{
map
<
int32_t
,
int16_t
>
tmpMap
;
tmpMap
[
start
+
this
->
getWindowSize
()]
=
0
;
windows
[
start
]
=
tmpMap
;
}
// fasta parsing
ifstream
fastaStream
(
this
->
referenceFile
.
c_str
());
if
(
fastaStream
)
{
while
(
getline
(
fastaStream
,
ligne
))
{
if
(
ligne
.
find
(
header
)
==
0
)
{
ligne
.
replace
(
0
,
1
,
""
);
// Deal with headers including spaces, etc...
if
(
ligne
.
find
(
" "
)
!=
string
::
npos
)
{
ligne
=
parseOnSep
(
ligne
,
" "
)[
0
];
}
currentChr
=
ligne
;
map
<
int
,
char
>
tmpMap
;
this
->
referenceSequence
[
currentChr
]
=
tmpMap
;
}
else
{
for
(
unsigned
int
k
=
0
;
k
<
ligne
.
length
();
k
++
)
{
mot
=
ligne
[
k
];
mot
=
checkBase
(
mot
);
this
->
referenceSequence
[
currentChr
][
i
]
=
mot
;
i
++
;
}
}
}
}
return
0
;
}
void
Galacs
::
displayGenome
()
{
map
<
string
,
map
<
int
,
char
>>::
iterator
myIterA
;
for
(
myIterA
=
this
->
referenceSequence
.
begin
();
myIterA
!=
this
->
referenceSequence
.
end
();
myIterA
++
)
{
cerr
<<
"Chr "
<<
myIterA
->
first
<<
" : "
<<
endl
;
map
<
int
,
char
>
tmpMap
;
tmpMap
=
myIterA
->
second
;
map
<
int
,
char
>::
iterator
myIterB
;
for
(
myIterB
=
tmpMap
.
begin
();
myIterB
!=
tmpMap
.
end
();
myIterB
++
)
{
cerr
<<
myIterB
->
first
<<
" : "
<<
myIterB
->
second
<<
" ; "
;
}
cerr
<<
endl
;
}
}
// bam parsing launcher
int
Galacs
::
bamParsingLauncher
()
{
BOOST_LOG_TRIVIAL
(
trace
)
<<
"Entering Galacs::bamParsingLauncher"
;
boost
::
asio
::
io_service
ioService
;
boost
::
thread_group
threadpool
;
boost
::
asio
::
io_service
::
work
work
(
ioService
);
// create the number of require threads
for
(
int
i
=
0
;
i
<=
this
->
getNbThread
()
-
1
;
i
++
)
{
threadpool
.
create_thread
(
boost
::
bind
(
&
boost
::
asio
::
io_service
::
run
,
&
ioService
));
}
// Take care of the bam
map
<
string
,
map
<
int32_t
,
int32_t
>>::
iterator
myIterA
;
for
(
myIterA
=
(
this
->
refDict
).
begin
();
myIterA
!=
(
this
->
refDict
).
end
();
myIterA
++
)
{
map
<
int32_t
,
int32_t
>::
iterator
myIterB
;
myIterB
=
myIterA
->
second
;
BOOST_LOG_TRIVIAL
(
info
)
<<
"Assigning new task for : "
<<
myIterA
->
first
<<
":"
<<
myIterB
->
first
<<
"-"
<<
myIterB
->
second
;
ioService
.
post
(
boost
::
bind
(
&
Galacs
::
readBam
,
this
,
myIterA
->
first
,
myIterB
->
first
,
myIterB
->
second
));
}
threadpool
.
join_all
();
BOOST_LOG_TRIVIAL
(
info
)
<<
"Thread are joined"
;
// after this you stop the processing loop, any tasks added after will not be
// executed
// ioService.stop();
// compute and display enrichment in motifs
//this->ComputeEnrichment();
BOOST_LOG_TRIVIAL
(
trace
)
<<
"Leaving Galacs::bamParsingLauncher"
;
return
0
;
}
// parse the bam file to get informations on data of the experiment
int
Galacs
::
readBam
(
string
chr
,
int32_t
start
,
int32_t
end
)
{
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Start of Reader for chr"
<<
chr
<<
" : "
<<
start
<<
"-"
<<
end
;
// map<int32_t, map<string, short int>> positionCount;
map
<
int32_t
,
Position
*>
baseDict
;
map
<
int32_t
,
map
<
int32_t
,
int16_t
>
>
windows
;
for
(
int32_t
a
=
start
;
a
<
end
;
a
+=
this
->
getStepSize
())
{
map
<
int32_t
,
int16_t
>
tmpMap
;
tmpMap
[
start
+
this
->
getWindowSize
()]
=
0
;
windows
[
start
]
=
tmpMap
;
}
// map<int32_t, string> localRefBamDict;
string
s_currentChr
;
samFile
*
fp
=
NULL
;
char
*
c_BamFile
=
NULL
;
char
*
region
=
NULL
;
bam_hdr_t
*
header
=
NULL
;
bam1_t
*
b
=
NULL
;
bam1_core_t
*
c
=
NULL
;
hts_idx_t
*
idx
=
NULL
;
hts_itr_t
*
iter
=
NULL
;
int
ret
,
nbReads
=
0
,
nbAlignementDup
=
0
,
nbUnmappedReads
=
0
,
nbBadMapReadQual
=
0
,
nbPassingReads
=
0
;
int32_t
lastChr
,
currentChr
,
currentStartPos
,
lastStartPos
;
map
<
int32_t
,
string
>
refMapDict
;
lastChr
=
-
1
;
lastStartPos
=
0
;
// open the bam file and verify
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Opening BAM file "
<<
" in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
;
c_BamFile
=
(
char
*
)(
this
->
getBamFile
()).
c_str
();
BOOST_LOG_TRIVIAL
(
debug
)
<<
"C_str bam file : "
<<
c_BamFile
;
fp
=
hts_open
(
c_BamFile
,
"rb"
);
BOOST_LOG_TRIVIAL
(
trace
)
<<
"Pointer to fp : "
<<
fp
;
if
(
fp
==
NULL
)
{
BOOST_LOG_TRIVIAL
(
error
)
<<
"Cannot open input file "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
chr
<<
":"
<<
start
<<
"-"
<<
end
;
return
1
;
}
BOOST_LOG_TRIVIAL
(
info
)
<<
"BAM is open in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
<<
":"
<<
start
<<
"-"
<<
end
;
// load index & verify
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Opening index file in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
":"
<<
start
<<
"-"
<<
end
;
idx
=
sam_index_load2
(
fp
,
c_BamFile
,
NULL
);
if
(
idx
==
NULL
)
{
BOOST_LOG_TRIVIAL
(
error
)
<<
"Cannot open bam index for input file "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
<<
chr
":"
<<
start
<<
"-"
<<
end
;
return
1
;
}
BOOST_LOG_TRIVIAL
(
info
)
<<
"Index file is open in thread "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
<<
chr
":"
<<
start
<<
"-"
<<
end
;
// manage header, need to convert to a string corresponding to our reference
header
=
sam_hdr_read
(
fp
);
for
(
int32_t
i
=
0
;
i
<
header
->
n_targets
;
i
++
)
{
string
chr
;
stringstream
ss
;
ss
<<
header
->
target_name
[
i
];
ss
>>
chr
;
refBamDict
[
i
]
=
chr
;
BOOST_LOG_TRIVIAL
(
trace
)
<<
"Ref idx "
<<
i
<<
" : "
<<
header
->
target_name
[
i
];
}
// perform the query
BOOST_LOG_TRIVIAL
(
info
)
<<
"Performing query in thread "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
string
tmpRegion
=
chr
+
":"
+
int_to_string
(
start
)
+
"-"
+
int_to_string
(
end
)
region
=
(
char
*
)
tmpRegion
.
c_str
();
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Region defined"
;
iter
=
sam_itr_querys
(
idx
,
header
,
region
);
BOOST_LOG_TRIVIAL
(
debug
)
<<
"iter result passed"
;
if
(
iter
==
NULL
)
{
BOOST_LOG_TRIVIAL
(
info
)
<<
"No read found in thread "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
return
;
}
// init reader
b
=
bam_init1
();
BOOST_LOG_TRIVIAL
(
debug
)
<<
"Bam init passed"
;
struct
timeval
tbegin
,
tend
;
gettimeofday
(
&
tbegin
,
NULL
);
// browse the data from the query
while
((
ret
=
sam_itr_next
(
fp
,
iter
,
b
))
>=
0
)
{
BOOST_LOG_TRIVIAL
(
trace
)
<<
"Reading aln in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
<<
": & ret : "
<<
ret
;
bool
take_the_read
=
true
;
nbReads
++
;
currentChr
=
b
->
core
.
tid
;
currentStartPos
=
b
->
core
.
pos
;
s_currentChr
=
this
->
refBamDict
[
currentChr
];
BOOST_LOG_TRIVIAL
(
debug
)
<<
"
\t
aln in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
<<
" => "
<<
currentChr
<<
" & "
<<
currentStartPos
<<
" & "
<<
s_currentChr
;
// Check bam is sorted
if
(
currentStartPos
<
lastStartPos
)
{
// cerr << "current pos : " << currentStartPos << " < last pos : " <<
// lastStartPos << endl;
return
1
;
}
c
=
&
b
->
core
;
// check if the read is not a pcr optical duplicate
if
((
c
->
flag
&
BAM_FDUP
)
and
(
this
->
GetPCRDupStatus
()
==
false
))
{
take_the_read
=
false
;
nbAlignementDup
++
;
BOOST_LOG_TRIVIAL
(
debug
)
<<
currentChr
<<
":"
<<
currentStartPos
<<
" = PCR Dup "
;
}
// - it's aligned
if
((
c
->
flag
&
BAM_FUNMAP
)
and
(
c
->
flag
&
BAM_FPROPER_PAIR
))
{
take_the_read
=
false
;
nbUnmappedReads
++
;
BOOST_LOG_TRIVIAL
(
debug
)
<<
currentChr
<<
":"
<<
currentStartPos
<<
" = unmapped "
;
}
// - the mapping quality >= treshold
if
(
c
->
qual
<
this
->
GetAlnQThreshold
())
{
take_the_read
=
false
;
nbBadMapReadQual
++
;
BOOST_LOG_TRIVIAL
(
debug
)
<<
currentChr
<<
":"
<<
currentStartPos
+
1
<<
" = MAPQ="
<<
int_to_string
(
c
->
qual
)
<<
" vs "
<<
this
->
GetAlnQThreshold
()
<<
" : Discarding read."
;
}
else
{
BOOST_LOG_TRIVIAL
(
trace
)
<<
currentChr
<<
":"
<<
currentStartPos
+
1
<<
" = MAPQ="
<<
int_to_string
(
c
->
qual
)
<<
" vs "
<<
this
->
GetAlnQThreshold
()
<<
" : Keeping read."
;
}
// treat read if ok
if
(
take_the_read
)
{
BOOST_LOG_TRIVIAL
(
debug
)
<<
"
\t
aln in thread "
<<
this
->
getBamFile
()
<<
":"
<<
chr
":"
<<
start
<<
"-"
<<
end
<<
" : Taking the read"
;
map
<
int32_t
,
map
<
int32_t
,
int16_t
>
>::
iterator
iter
;
map
<
int32_t
,
int16_t
>::
iterator
iterBis
;
for
(
iter
=
windows
.
lower_bound
(
b
->
core
.
pos
)
;
iter
!=
windows
.
upper_bound
(
b
->
core
.
pos
+
b
->
core
.
l_qseq
)
;
iter
++
)
{
iterBis
=
iter
->
second
;
windows
[
iter
->
first
][
iterBis
->
first
]
++
;
}
}
BOOST_LOG_TRIVIAL
(
debug
)
<<
"
\t
aln in thread "
<<
this
->
getBamFile
()
<<
" in thread : "
<<
chr
":"
<<
start
<<
"-"
<<
end
<<
" : End of read parsing"
;
BOOST_LOG_TRIVIAL
(
trace
)
<<
"End of turn"
;
if
((
nbReads
%
200000
)
==
0
)
{
BOOST_LOG_TRIVIAL
(
info
)
<<
"#"
<<
this
->
getBamFile
()
<<
" : "
<<
chr
<<
" : Total reads : "
<<
nbReads
<<
" ; Passing reads : "
<<
nbPassingReads
<<
" ; Unmapped reads : "
<<
nbUnmappedReads
<<
" ; PCR Duplicate reads : "
<<
nbAlignementDup
<<
" ; bad aln quality reads : "
<<
nbBadMapReadQual
<<
" in thread "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
gettimeofday
(
&
tend
,
NULL
);
BOOST_LOG_TRIVIAL
(
info
)
<<
ExecMeasure
(
tbegin
,
tend
,
"+200 000 read treated"
);
gettimeofday
(
&
tbegin
,
NULL
);
}
}
BOOST_LOG_TRIVIAL
(
trace
)
<<
"End of while iter"
;
bam_destroy1
(
b
);
BOOST_LOG_TRIVIAL
(
trace
)
<<
"bam destroy passed"
;
BOOST_LOG_TRIVIAL
(
info
)
<<
"#"
<<
this
->
getBamFile
()
<<
" : "
<<
chr
<<
" : Total reads : "
<<
nbReads
<<
" ; Passing reads : "
<<
nbPassingReads
<<
" ; Unmapped reads : "
<<
nbUnmappedReads
<<
" ; PCR Duplicate reads : "
<<
nbAlignementDup
<<
" ; bad aln quality reads : "
<<
nbBadMapReadQual
<<
" in thread "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
hts_itr_destroy
(
iter
);
bam_hdr_destroy
(
header
);
// Merging this thread data to the global container
// probably need a mutex
BOOST_LOG_TRIVIAL
(
info
)
<<
"Merging thread data in final data in thread "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
map
<
int32_t
,
map
<
int32_t
,
int32_t
>
>::
iterator
myIterA
;
for
(
myIterA
=
windows
.
begin
();
myIterA
!=
windows
.
end
();
myIterA
++
)
{
map
<
int32_t
,
int16_t
>::
iterator
myIterB
;
myIterB
=
myIterA
->
second
;
rawCounts
[
chr
][
myIterA
->
first
][
myIterB
->
first
]
=
myIterB
->
second
;
}
// delete fp;
// delete c_BamFile;
// delete region;
// delete header;
// delete iter;
BOOST_LOG_TRIVIAL
(
debug
)
<<
"End of Reader for indiv in thread "
<<
chr
":"
<<
start
<<
"-"
<<
end
;
return
0
;
}
Galacs.cu
0 → 100644
View file @
e3c16446
// cnvCaller.cu
// Version : 1.1.0
// Description : C++/CUDA kernels to compute information needed by cnvcallerGPU
// Author: Theo.Serralta@u-bourgogne.fr ; yannis.duffourd@u-bourgogne.fr ; anthony.auclair@u-bourgogne.fr
#include <cuda_runtime.h>
// Kernel to compute raw average depth
__global__ void computeDepthKernel(int *depth_data, int seq_length, int window_size, int step_size, float *depth_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
int pos_start = (idx * step_size);
int pos_end = pos_start + window_size;
int count_reads = 0;
for (int i = pos_start; i < pos_end; i++) {
count_reads += depth_data[i];
}
depth_results[idx] = static_cast<float>(count_reads) / window_size;
}
}
// NEED TO OPTIMIZE WITH 3RD DEGREE POLYNOME
// Kernel to normalize depth using GC content and mappability
__global__ void normalizeDepthKernel(float *depth_correction, float *gc_results, float *map_results,
float mean_depth, float *gc_to_median, int seq_length,
int window_size, int step_size, float *normalized_depth) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
float gc_value = gc_results[idx];
float map_value = map_results[idx];
float depth_value = depth_correction[idx];
// Récupérer la médiane du GC content
float gc_median = gc_to_median[(int)gc_value];
// Vérifier les valeurs pour éviter des erreurs (division par zéro)
if (gc_median != 0.0f && map_value != 0.0f) {
normalized_depth[idx] = (depth_value / map_value) * (mean_depth / gc_median);
} else {
normalized_depth[idx] = 0.0f;
}
}
}
// Kernel to compute GC content in a sliding window
__global__ void computeGCKernel(char *seq_data, int seq_length, int window_size, int step_size, float *gc_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
int pos_start = (idx * step_size);
int pos_end = pos_start + window_size;
int gc_count = 0;
for (int i = pos_start; i < pos_end; ++i) {
char base = seq_data[i];
if (base == 'G' || base == 'C' || base == 'g' || base == 'c') {
gc_count++;
}
}
gc_results[idx] = (static_cast<float>(gc_count) / window_size) * 100.0f;
}
}
// Kernel to compute weighted average mappability
__global__ void computeMappabilityKernel(float *map_data, int seq_length, int window_size, int step_size, float *map_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length - window_size + 1) {
int pos_start = (idx * step_size);
int pos_end = pos_start + window_size;
float weighted_sum = 0.0f;
float total_weight = 0.0f;
for (int i = pos_start; i < pos_end; ++i) {
float weight = map_data[i];
weighted_sum += weight;
total_weight += 1.0f;
}
map_results[idx] = (total_weight > 0.0f) ? (weighted_sum / total_weight) : 0.0f;
}
}
// Kernel to compute Z-score
__global__ void computeZScoreKernel(float *normalized_depth, float mean_ratio, float std_ratio, int seq_length, float *z_score_results) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < seq_length) {
float value = normalized_depth[idx];
if (isinf(value) || isnan(value)) {
value = 0.0f;
}
z_score_results[idx] = (value - mean_ratio) / std_ratio;
}
}
\ No newline at end of file
Galacs.h
0 → 100644
View file @
e3c16446
#ifndef _CNVCALLERGPU_H
#define _CNVCALLERGPU_H
#include <vector>
#include <map>
#include <string>
#include <htslib/sam.h>
class
cnvCallerGPU
{
public
:
cnvCallerGPU
();
cnvCallerGPU
(
std
::
string
,
std
::
string
,
std
::
string
,
std
::
string
,
std
::
string
,
int
,
int
,
double
,
int
,
int
);
int
mainLoop
();
int
getNbThread
();
int
getWindowSize
();
int
getStepSize
();
double
getZscore
();
std
::
string
getOutputFilePairs
();
std
::
string
getOutputFileSplit
();
std
::
string
getOutputFile
();
std
::
string
getReferenceFile
();
std
::
string
getBamFile
();
private
:
std
::
string
sampleName
;
std
::
string
inputBamFile
;
std
::
string
referenceFile
;
std
::
string
outputFile
;
std
::
string
outputFilePairs
;
std
::
string
outputFileSplit
;
std
::
map
<
std
::
string
,
std
::
map
<
int32_t
,
std
::
map
<
int32_t
,
int32_t
>>>
rawCounts
;
std
::
map
<
std
::
string
,
std
::
map
<
int32_t
,
std
::
map
<
int32_t
,
float
>>>
gcContent
;
std
::
map
<
std
::
string
,
std
::
map
<
int32_t
,
int32_t
>>
refDict
;
int
depthThreshold
;
int
nbThread
;
int
loggingLevel
;
int
window
;
int
step
;
double
zscore
;
// method
int
getGenome
();
void
displayGenome
();
int
readBam
(
std
::
string
,
int32_t
,
int32_t
);
int
bamParsingLauncher
();
};
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment