test_gpu_mean_depth.py 13.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
import pysam
import sys
import getopt
import logging
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
from pycuda.autoinit import context
11
import multiprocessing
12
from multiprocessing import Process
13 14

# Options
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

def parse_arguments():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'b:w:s:t:o:e:')
        bamfile_path, window_size, step_size, output_file, logfile = None, None, None, None, None
        for opt, arg in opts:
            if opt in ("-b"):
                bamfile_path = arg
            if opt in ("-w"):
                window_size = int(arg)
            if opt in ("-s"):
                step_size = int(arg)
            if opt in ("-o"):
                output_file = arg
            if opt in ("-e"):
                logfile = arg
        return bamfile_path, window_size, step_size, output_file, logfile
    except getopt.GetoptError:
        print('Invalid argument')
        sys.exit(1)

if __name__ == '__main__':
    bamfile_path, window_size, step_size, output_file, logfile = parse_arguments()
    logging.basicConfig(filename='%s' % (logfile), filemode='a', level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s')
    logging.info('start')
    global seq
    
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
# Code CUDA
mod = SourceModule("""
//Kernel pour calculer la profondeur moyenne brute

__global__ void calcul_depth_kernel(int *depth_data, int seq_length, int window_size, int step_size, float *depth_results) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx < seq_length - window_size + 1) {
        int pos_start = (idx * step_size) + 1;
        int pos_end = pos_start + window_size;
        int count_reads = 0;

                
        for (int i = pos_start; i < pos_end; i++) {
            count_reads += depth_data[i];
        }

        float avg_depth = (float)count_reads / window_size;
        depth_results[idx] = avg_depth;
    }
}

//Kernel pour calculer le GC content

__global__ void calcul_gc_kernel(char *gc_data, int seq_length, int window_size, int step_size, float *gc_results) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx < seq_length - window_size + 1) {
        int pos_start = (idx * step_size) + 1;
        int pos_end = pos_start + window_size;
        int gc_count = 0;
    
        //printf(gc_data);
        //printf(" ");
        
        for (int i = pos_start; i <= pos_end; ++i) {
            if ((gc_data[i] == 'G') or (gc_data[i] == 'C') or (gc_data[i] == 'g') or (gc_data[i] == 'c')) {
                //printf(&gc_data[i]);
                //printf(" ");
                gc_count++;
            }
        }
        
        float avg_gc = ((float)gc_count / window_size) * 100;
        gc_results[idx] = avg_gc;
    }
}

// Kernel pour calculer la mappabilite moyenne ponderee

__global__ void calcul_map_kernel(float *map_data, int seq_length, int window_size, int step_size, float *map_results) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx < seq_length - window_size + 1) {
        int pos_start = (idx * step_size) + 1;
        int pos_end = pos_start + window_size;
        float weighted_sum = 0.0;
        float total_weight = 0.0;

        for (int i = pos_start; i <= pos_end; ++i) {
            float weight = map_data[i];
            weighted_sum += weight;
            total_weight += 1;
        }

        float avg_map = weighted_sum / total_weight;
        map_results[idx] = avg_map;
    }
}

""")
    
# Obtention de la fonction de kernel compilée
calcul_depth_kernel_cuda = mod.get_function("calcul_depth_kernel")
calcul_gc_kernel_cuda = mod.get_function("calcul_gc_kernel")
calcul_map_kernel_cuda = mod.get_function("calcul_map_kernel")

#############################################
######<---Fonctions  mappability--->#########
#############################################
def merge_intervals(intervals):
123
    #sys.stderr.write("\t Entering merge_intervals\n")
124 125 126 127 128 129 130 131 132 133 134 135
    merged = []
    start, end, score = intervals[0]
    for interval in intervals[1:]:
        if interval[2] == score:
            end = interval[1]
        else:
            merged.append((start, end, score))
            start, end, score = interval
    merged.append((start, end, score))
    return merged

def dico_mappabilite(mappability_file):
136
    sys.stderr.write("\t Entering dico_mappabilite\n")
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    mappability_dico = {}

    with open(mappability_file, 'r') as f:
        for line in f:
            fields = line.strip().split('\t')
            if len(fields) < 4:
                continue
            
            chromosome = fields[0]
            start_pos = int(fields[1])
            end_pos = int(fields[2])
            score = float(fields[3])

            if chromosome not in mappability_dico:
                mappability_dico[chromosome] = []

            mappability_dico[chromosome].append((start_pos, end_pos, score))

    # Ajout de la position 0 pour chaque chromosome
    for chromosome, intervals in mappability_dico.items():
        if intervals[0][0] != 0:
            mappability_dico[chromosome].insert(0, (0, intervals[0][0], 0))

    # Fusion des intervalles ayant le même score
    for chromosome, intervals in mappability_dico.items():
        merged_intervals = merge_intervals(intervals)
        mappability_dico[chromosome] = {start: score for start, _, score in merged_intervals}
164 165
        
    sys.stderr.write("\t Leaving dico_mappabilite\n")
166 167 168
    return mappability_dico #Dictionnaire avec les bornes de mappabilité en fonction des positions pour chaque chromosome.
    
def calcul_mappability(seq_length, mappability, chr):
169
    sys.stderr.write("\t Entering calcul_mappability \n")
170
    map_data = np.zeros(seq_length, dtype=np.float32)
171
    sorted_keys = sorted(mappability[chr].keys())
172
    #sys.stderr.write("\t sorted_keys =\n")
173
    
174 175 176 177 178 179 180 181 182 183 184 185
    prev_bound = 0
    prev_mappability = 0
    
    for bound in sorted_keys:
        for i in range(prev_bound, min(seq_length, bound)):
            map_data[i] = prev_mappability
        prev_bound = bound
        prev_mappability = mappability[chr][bound]
    
    # Fill remaining positions if sequence length exceeds last bound
    for i in range(prev_bound, seq_length):
        map_data[i] = prev_mappability
186

187
    sys.stderr.write("\t Leaving calcul_mappability \n")
188 189
    return map_data
 
190 191 192 193
#############################################
######<---Fonctions calcul gc--->############
#############################################
def parse_fasta(gc_file):
194
    sys.stderr.write("\t Entering parse_fasta\n")
195 196 197 198 199 200 201 202
    sequences = {}
    with open(gc_file, 'r') as f:
        data = f.read().split('>')
        for entry in data[1:]:
            lines = entry.split('\n')
            header = lines[0]
            sequence = ''.join(lines[1:])
            sequences[header] = sequence
203 204
            
    sys.stderr.write("\t Leaving parse_fasta\n")
205 206
    return sequences

207
    
208
def calcul_gc_content(seq_length, chr, seq):
209
    sys.stderr.write("\t Entering calcul_gc_content\n")
210 211 212 213 214
    gc_data = np.zeros(seq_length, dtype="S")
    for i in range(len(seq[chr])):
        gc_data[i] = seq[chr][i]
    #print(gc_data[9950:10200])
    
215 216 217
    sys.stderr.write("\t Leaving calcul_gc_content\n")
    return gc_data

218 219 220
##############################################
######<---Fonctions calcul Depth Seq--->######
##############################################
221
def calcul_depth_seq(seq_length, bamfile_path, chr):
222
    sys.stderr.write("\t Entering calcul_depth_seq\n")
223
    depth_data = np.zeros(seq_length, dtype=np.int32)
224
    for pileupcolumn in bamfile_path.pileup():
225 226 227 228 229
        #sys.stderr.write("%s : %s \n" % (pileupcolumn.reference_pos, pileupcolumn.nsegments))
        if pileupcolumn.reference_pos > seq_length:
            break
        depth_data[pileupcolumn.reference_pos] = pileupcolumn.nsegments
    
230
    sys.stderr.write("\t Leaving calcul_depth_seq\n")
231 232 233 234 235
    return depth_data

#################################
######<---Fonction main--->######
#################################
236
def main_calcul(bamfile_path, chr, seq_length, window_size, step_size, output_file):
237 238 239
    sys.stderr.write("\t entering main_calcul\n")
    global seq

240 241
    # Calcul mappability
    map_data = Process(target = calcul_mappability, args=(seq_length, mappability, chr))
242

243 244
    # Calcul GC
    gc_data = Process(target = calcul_gc_content, args = (seq_length, chr, seq))
245

246 247 248 249 250 251 252 253 254 255
    # Calcul depth seq
    depth_data = Process(target = calcul_depth_seq, args = (seq_length, bamfile_path, chr))
    
    map_data.start()
    gc_data.start()
    depth_data.start()
    
    map_data.join()
    gc_data.join()
    depth_data.join()
256
    
257

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    # Transférer le tableau NumPy vers CUDA
    d_depth_data = cuda.mem_alloc(depth_data.nbytes)
    cuda.memcpy_htod(d_depth_data, depth_data)
    sys.stderr.write("\t d_depth_data : %s, %s\n" % (d_depth_data, d_depth_data.as_buffer(sys.getsizeof(depth_data))))
    
    d_gc_data = cuda.mem_alloc(gc_data.nbytes)
    cuda.memcpy_htod(d_gc_data, gc_data)
    sys.stderr.write("\t d_gc_data : %s, %s\n" % (d_gc_data, d_gc_data.as_buffer(sys.getsizeof(gc_data))))
    
    d_map_data = cuda.mem_alloc(map_data.nbytes)
    cuda.memcpy_htod(d_map_data, map_data)
    sys.stderr.write("\t d_map_data : %s, %s\n" % (d_map_data, d_map_data.as_buffer(sys.getsizeof(map_data))))
    
    # Calculer la taille de la grille et de bloc pour CUDA
    block_size = num_cores
    sys.stderr.write("\t blocksize (nb de threads) = %s\n" % (num_cores))
    grid_size = int((int((seq_length - window_size) / step_size) + 1) / block_size)+1
    sys.stderr.write("\t grid_size = \n")
    
    # Initialiser le tableau pour stocker les résultats de la profondeur moyenne
    depth_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
    sys.stderr.write("\t Definition de depth_results\n")

    gc_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
    sys.stderr.write("\t Definition de gc_results\n")
    
    map_results = np.zeros(int((seq_length - window_size) / step_size) + 1, dtype=np.float32)
    sys.stderr.write("\t Definition de map_results\n")
        
    # Allouer de la mémoire pour les résultats sur le périphérique CUDA
    d_depth_results = cuda.mem_alloc(depth_results.nbytes)
    sys.stderr.write("\t d_depth_results = %s\n" % d_depth_results.as_buffer(sys.getsizeof(d_depth_results)))
    sys.stderr.write("\t depth_results.nbytes = %s\n" % depth_results.nbytes)

    d_gc_results = cuda.mem_alloc(gc_results.nbytes)
    sys.stderr.write("\t d_gc_results = %s\n" % d_gc_results.as_buffer(sys.getsizeof(d_gc_results)))
    sys.stderr.write("\t gc_results.nbytes = %s\n" % gc_results.nbytes)
    
    d_map_results = cuda.mem_alloc(map_results.nbytes)
    sys.stderr.write("\t d_map_results = %s\n" % d_map_results.as_buffer(sys.getsizeof(d_map_results)))
    sys.stderr.write("\t map_results.nbytes = %s\n" % map_results.nbytes)
    
    # Appeler la fonction de calcul de profondeur avec CUDA
    calcul_depth_kernel_cuda(d_depth_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_depth_results, block=(block_size, 1, 1), grid=(grid_size, 1))
    sys.stderr.write("\t appel fonction calc_depth_kernel_cuda\n")

    calcul_gc_kernel_cuda(d_gc_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_gc_results, block=(block_size, 1, 1), grid=(grid_size, 1))
    sys.stderr.write("\t appel fonction calc_gc_kernel_cuda\n")

    calcul_map_kernel_cuda(d_map_data, np.int32(seq_length), np.int32(window_size), np.int32(step_size), d_map_results, block=(block_size, 1, 1), grid=(grid_size, 1))
    sys.stderr.write("\t appel fonction calc_map_kernel_cuda\n")

    context.synchronize()

    # Copier les résultats depuis le périphérique CUDA vers l'hôte
    cuda.memcpy_dtoh(depth_results, d_depth_results) #cuda.memcpy_dtoh(dest, src)
    sys.stderr.write("\t Copie les resultats du GPU (d_depth_results) vers le CPU (depth_results)\n")
    
    cuda.memcpy_dtoh(gc_results, d_gc_results) #cuda.memcpy_dtoh(dest, src)
    sys.stderr.write("\t Copie les resultats du GPU (d_gc_results) vers le CPU (gc_results)\n")
    
    cuda.memcpy_dtoh(map_results, d_map_results) #cuda.memcpy_dtoh(dest, src)
    sys.stderr.write("\t Copie les resultats du GPU (d_map_results) vers le CPU (map_results)\n")
    
    # Ecrire les résultats dans le fichier de sortie
    with open(output_file, 'a') as f:
        sys.stderr.write("\t ecriture des fichiers\n")
        for i, (avg_depth, avg_gc, avg_map) in enumerate(zip(depth_results, gc_results, map_results)):
            pos_start = (i * step_size) + 1
            pos_end = pos_start + window_size
            f.write(f"{chr}\t{pos_start}\t{pos_end}\t{avg_depth}\t{avg_gc}\t{avg_map}\n")


# Programme principal
#Calcul nombre de coeurs max pour le GPU
333

334 335 336
device = cuda.Device(0)
attributes = device.get_attributes()
num_cores = attributes[1]
337 338
print("Nombre de CPU: ", multiprocessing.cpu_count())
print(f"Nombre de coeurs max GPU: {num_cores}")
339

340 341 342 343
gc_file = '/work/gad/shared/pipeline/grch38/index/grch38_essential.fa'
mappability_file = '/work/gad/shared/analyse/test/cnvGPU/test_scalability/wgEncodeCrgMapabilityAlign100mer_no_uniq.grch38.bedgraph'
seq = parse_fasta(gc_file)
mappability = dico_mappabilite(mappability_file)
344 345

with pysam.AlignmentFile(bamfile_path, "rb") as bamfile_handle:
346 347 348 349 350 351 352 353 354
    for i, seq_length in enumerate(bamfile_handle.lengths):
        chr = bamfile_handle.references[i]
        sys.stderr.write("Chromosome : %s, seq length : %s\n" % (chr, seq_length))

        # Appeler la fonction de calcul de la profondeur moyenne pour ce chromosome
        main_calcul(bamfile_handle, chr, seq_length, window_size, step_size, output_file)