#!/bin/bash ### GAD PIPELINE ### ## full_rnaseq.sh ## Description : full analysis pipeline for rnaseq data : include, preprocessing, DE, ASE, splice, VC ## Usage : qsub -v ANALYSISDIR=<path to the analysis dir>,CONFIGFILE=<path to the config file> full_rnaseq.sh ## Output : no standard output ## Requirements : Require all pipeline scripts ## Author : yannis.duffourd@u-bourgogne.fr ## Creation Date : 20200929 ## Last revision date : 20201110 ## Known bugs : None. #$ -N full_rnaseq #$ -q batchbm #$ -pe smp 1 #$ -V # functions function jumpto { label=$1 cmd=$(sed -n "/$label:/{:a;n;p;ba};" $0 | grep -v ':$') eval "$cmd" exit } # logging parameters. # log file if [ -z ${LOGFILE} ] then LOGFILE=full_rnaseq.$(date +"%F_%H-%M-%S").log fi exec 1>> $LOGFILE 2>&1 # mandatory arguments if [ -z ${ANALYSISDIR} ] then echo "ANALYSISDIR was not defined : execution stopped" exit 1 fi # Verify the config file has been passed. Otherwise : stop the script. if [ -z ${CONFIGFILE+x} ] then echo "Config file not provided by the user. You need it to run this script. Stopping execution." exit fi # usefull variables TEMPORARY_DIR=`grep temporary_dir $CONFIGFILE | cut -f2` DBSNP=`grep dbsnp $CONFIGFILE | cut -f2` REF=`grep reference $CONFIGFILE | cut -f2` GATKBASE=`grep GATKbase $CONFIGFILE | cut -f2` JAVACMD=`grep javacmd $CONFIGFILE | cut -f2` PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2` PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2` TARGETPATH=`grep targetlist $CONFIGFILE | cut -f2` # sample list samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;` # chr list chr_list=$(grep ">" $REF | sed 's/>//') # python path for proper execution of python PYTHONPATH=$PIPELINEBASE/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/ export PYTHONPATH # preprocessing status file if [ -z ${STATUSFILE} ] then STATUSFILE=$ANALYSISDIR/status.tsv fi # check fichier log if [ ! -e $STATUSFILE ] then echo "No status file found : Jumping to organize" jumpto rename else echo "### Getting last analysis status ###" echo "Status file found : $STATUSFILE" # delete .failed files rm $ANALYSISDIR/*.failed # get the status renameRawStatus=$(grep "rename_raw" $STATUSFILE | cut -f2) organizeStatus=$(grep "organize_data_folder" $STATUSFILE | cut -f2) fastqcStatus=$(grep "process_fastqc" $STATUSFILE | cut -f2) trimStatus=$(grep "trim_fastq" $STATUSFILE | cut -f2) alnStatus=$(grep "align_star2" $STATUSFILE | cut -f2) cleanbamStatus=$(grep "clean_bam" $STATUSFILE | cut -f2) sortbamStatus=$(grep "sort_bam" $STATUSFILE | cut -f2) markDupStatus=$(grep "mark_duplicates" $STATUSFILE | cut -f2) rgStatus=$(grep "add_rg" $STATUSFILE | cut -f2) strimStatus=$(grep "split_trim" $STATUSFILE | cut -f2) recalStatus=$(grep "recal_bam" $STATUSFILE | cut -f2) matrixStatus=$(grep "matrix" $STATUSFILE | cut -f2) DEStatus=$(grep "DE" $STATUSFILE | cut -f2) outriderStatus=$(grep "outrider" $STATUSFILE | cut -f2) leafcutterStatus=$(grep "leafcutter" $STATUSFILE | cut -f2) rmatsStatus=$(grep "rmats" $STATUSFILE | cut -f2) SJStatus=$(grep "SJ" $STATUSFILE | cut -f2) integrationStatus=$(grep "integration" $STATUSFILE | cut -f2) # sample list samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;` if [ "$renameRawStatus" = "FAIL" ] then echo "Renaming step failed : jumping to organize $renameStatus" jumpto rename else echo "Renaming step was OK" fi if [ "$organizeStatus" = "FAIL" ] then echo "Organizing step failed : jumping to organize : $organizeStatus" jumpto organize else echo "Organizing step was OK" fi if [ "$fastqcStatus" = "FAIL" ] then echo "QC step failed : jumping to qc" jumpto qc else echo "QC step was OK" fi if [ "$trimStatus" = "FAIL" ] then echo "Trimming step failed : jumping to trim" jumpto trim else echo "Trimming step step was OK" fi if [ "$alnStatus" = "FAIL" ] then echo "Alignement step failed : jumping to aln" jumpto aln else echo "Alignement step was OK" fi if [ "$sortsamtobamStatus" = "FAIL" ] then echo "Sorting step failed : jumping to sort" jumpto sort else echo "Sorting step was OK" fi if [ "$cleanbamStatus" = "FAIL" ] then echo "Cleaning step failed : jumping to clean" jumpto clean else echo "Cleaning step was OK" fi if [ "$markDupStatus" = "FAIL" ] then echo "Duplicate marking step failed : jumping to dup" jumpto dup else echo "Duplicate marking step was OK" fi if [ "$recalStatus" = "FAIL" ] then echo "Recalibration step failed : jumping to recal" jumpto recal else echo "Recalibration step was OK" fi if [ "$matrixStatus" = "FAIL" ] then echo "matrix step failed : jumping to recal" jumpto matrix else echo "Matrix step was OK" fi if [ "$DEStatus" = "FAIL" ] then echo "DE step failed : jumping to DE" jumpto DE else echo "DE step was OK" fi if [ "$outriderStatus" = "FAIL" ] then echo "Outrider step failed : jumping to Outrider" jumpto outrider else echo "Outrider step was OK" fi if [ "$leafcutterStatus" = "FAIL" ] then echo "LeafCutter step failed : jumping to LeafCutter" jumpto leafcutter else echo "LeafCutter step was OK" fi if [ "$rmatsStatus" = "FAIL" ] then echo "rMATs step failed : jumping to rMATs" jumpto rmats else echo "rMATs step was OK" fi if [ "$SJStatus" = "FAIL" ] then echo "SJ step failed : jumping to SJ" jumpto SJ else echo "SJ step was OK" fi if [ "$integrationStatus" = "FAIL" ] then echo "Integration step failed : jumping to Integration" jumpto integration else echo "Integration step was OK" fi echo "The pipeline was correctly executed, no need to relaunch a step" exit 0 fi # organize the folder & rename files. rename: ## INPUT = ANALYSISDIR variable ; OUTPUT = NONE echo "### Organizing directory & setting analysis parameters ###" echo "Start : $(date)" # Rename files according to our data sheet echo "Command : $PYTHONBIN $PIPELINEBASE/common/fastq/rename_fastq.py -d $ANALYSISDIR -f rna " $PYTHONBIN $PIPELINEBASE/common/fastq/rename_fastq.py -d $ANALYSISDIR -f rna rename_fastq_exitcode=$? if [ $rename_fastq_exitcode != 0 ] then echo "rename_fastq.py failed: execution stopped" exit 1 fi organize: # organize dir & set parameters for the analysis echo "Command : $PYTHONBIN $PIPELINEBASE/common/fastq/organize_data_folder.py -d $ANALYSISDIR -p $PIPELINEBASE -t $TARGETPATH" $PYTHONBIN $PIPELINEBASE/common/fastq/organize_data_folder.py -d $ANALYSISDIR -p $PIPELINEBASE -t $TARGETPATH echo "End : $(date)" sleep 5 # sample list samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;` # first pass fastqc qc: echo "### First pass fastqc ###" echo "Start : $(date)" for currentSample in $samples do echo "FP fastqc for sample : $currentSample" # R1 echo "Command : qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R1.$(date +%F_%H-%M-%S).log $PIPELINEBASE/common/fastq/process_fastqc.sh" qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh # R2 echo "Command : qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R2.$(date +%F_%H-%M-%S).log $PIPELINEBASE/common/fastq/process_fastqc.sh" qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh done echo "End : $(date)" # fastq trimming trim: echo "### trim fastq ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N trim_fastq_$currentSample -pe smp 4 -hold_jid fastqc_FP_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R1.fastq.gz,OUTPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R2.fastq.gz,FASTQCFILEONE=$ANALYSISDIR/$currentSample/QC/$currentSample.R1_fastqc/summary.txt,FASTQCFILETWO=$ANALYSISDIR/$currentSample/QC/$currentSample.R2_fastqc/summary.txt,MISEQ=F,LOGFILE=$ANALYSISDIR/$currentSample/logs/trim_fastq.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/trim_fastq.sh" qsub -N trim_fastq_$currentSample -pe smp 4 -hold_jid fastqc_FP_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R1.fastq.gz,OUTPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R2.fastq.gz,FASTQCFILEONE=$ANALYSISDIR/$currentSample/QC/$currentSample.R1_fastqc/summary.txt,FASTQCFILETWO=$ANALYSISDIR/$currentSample/QC/$currentSample.R2_fastqc/summary.txt,MISEQ=F,LOGFILE=$ANALYSISDIR/$currentSample/logs/trim_fastq.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/trim_fastq.sh done echo "End : $(date)" # second pass fastqc echo "### Second pass fastqc ###" echo "Start : $(date)" for currentSample in $samples do echo "SP fastqc for sample : $currentSample" # R1 echo "Command : qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh" qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh # R2 echo "Command : qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh" qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh done echo "End : $(date)" # alignement with STAR2 in 2pass mode aln: echo "### aligning sequences ###" echo "Start : $(date)" compile_dependency_aln="" for currentSample in $samples do echo "Command : qsub -N aln_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,LOGFILE=$ANALYSISDIR/$currentSample/logs/align_star.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/process_RNASeq_alignement_2pass.sh" qsub -N aln_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,LOGFILE=$ANALYSISDIR/$currentSample/logs/align_star.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/process_RNASeq_alignement_2pass.sh compile_dependency_aln=${compile_dependency_aln},aln_${currentSample} done compile_dependency_aln="-hold_jid ${compile_dependency_aln#,}" echo "End : $(date)" # sort bam sort: echo "### Sort & convert sam file to bam ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N sortsamtobam_$currentSample -hold_jid aln_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.Aligned.sortedByCoord.out.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/sort_sam_to_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/sort_sam_to_bam.sh" qsub -N sortsamtobam_$currentSample -hold_jid aln_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.Aligned.sortedByCoord.out.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/sort_sam_to_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/sort_sam_to_bam.sh done echo "End : $(date)" # clean bam clean: echo "### Cleaning bam file ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N clean_$currentSample -hold_jid sortsamtobam_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/clean_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/clean_bam.sh" qsub -N clean_$currentSample -hold_jid sortsamtobam_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/clean_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/clean_bam.sh done echo "End : $(date)" # read groups rg: echo "### Adding @RG tags to bam file ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N addRG_$currentSample -hold_jid clean_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,TECHNOLOGY=wes,LOGFILE=$ANALYSISDIR/$currentSample/logs/add_readgroups.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/bam/add_readgroups.sh" qsub -N addRG_$currentSample -hold_jid clean_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,TECHNOLOGY=wes,LOGFILE=$ANALYSISDIR/$currentSample/logs/add_readgroups.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/bam/add_readgroups.sh done # mark duplicates dup: echo "### Mark duplicates from bam file ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N mark_$currentSample -hold_jid addRG_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/mark_duplicates.$(date +"%F_%H-%M-%S").log,METRICSFILE=$ANALYSISDIR/$currentSample/QC/$currentSample.dedup.metrics $PIPELINEBASE/common/bam/mark_duplicates.sh" qsub -N mark_$currentSample -hold_jid addRG_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/mark_duplicates.$(date +"%F_%H-%M-%S").log,METRICSFILE=$ANALYSISDIR/$currentSample/QC/$currentSample.dedup.metrics $PIPELINEBASE/common/bam/mark_duplicates.sh done echo "End : $(date)" # Split'N'Trim strim: echo "### split and trim the bam file ###" echo "Start : $(date)" for currentSample in $samples do echo "Command : qsub -N strim_$currentSample -hold_jid mark_$currentSample -pe smp 3 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/split_trim.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/split_cigar_reads.sh" qsub -N strim_$currentSample -hold_jid mark_$currentSample -pe smp 3 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/split_trim.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/split_cigar_reads.sh done echo "End : $(date)" # recalibrates bases recal: echo "### Recalibrating bases ###" echo "Start : $(date)" compile_dependency="" for currentSample in $samples do echo "Command : qsub -N recal_$currentSample -hold_jid strim_$currentSample -pe smp 6 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TABLE=$ANALYSISDIR/$currentSample/QC/$currentSample.recal_table.tsv,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/recal_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/recal_bam.sh compile_dependency=${compile_dependency},recal_$currentSample" qsub -N recal_$currentSample -hold_jid strim_$currentSample -pe smp 6 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TABLE=$ANALYSISDIR/$currentSample/QC/$currentSample.recal_table.tsv,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/recal_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/recal_bam.sh compile_dependency=${compile_dependency},recal_${currentSample} done compile_dependency="-hold_jid ${compile_dependency#,}" echo "End : $(date)" # Differential expression pipeline # note that the analysis will be performed on the total batch considering it'as a classical design, with samples having the same caracteristics # create a list of count files matrix: echo "Command : qsub -N create_count ${compile_dependency_aln} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_list.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_list_counts.sh" qsub -N create_count ${compile_dependency_aln} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_list.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_list_counts.sh # create the matrix echo "Command : qsub -N matrix -hold_jid create_count -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTFILE=$ANALYSISDIR/all.counts.files.tsv,OUTPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/create_counts_matrix.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_create_counts_matrix.sh" qsub -N matrix -hold_jid create_count -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTFILE=$ANALYSISDIR/all.counts.files.tsv,OUTPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/create_counts_matrix.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_create_counts_matrix.sh # create the design files echo "Command : qsub -N design -hold_jid matrix -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_design.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/create_design.sh" qsub -N design -hold_jid matrix -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_design.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/create_design.sh annotdependency="none" # Run the DE analysis #deseq DE: for currentSample in $samples do echo "Command : qsub -N DE_$currentSample -hold_jid design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,DESIGNFILE=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_DE.$(date +"%F_%H-%M-%S").log,SAMPLE=$currentSample,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_de_analysis.sh" qsub -N DE_$currentSample -hold_jid design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,DESIGNFILE=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_DE.$(date +"%F_%H-%M-%S").log,SAMPLE=$currentSample,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_de_analysis.sh annotdependency=${annotdependency},DE_${currentSample} done #outrider outrider: for currentSample in $samples do echo "Command : qsub -N outrider_$currentSample -hold_jid design,matrix -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,MATRIX=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_outrider.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_outrider.sh" qsub -N outrider_$currentSample -hold_jid design,matrix -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,MATRIX=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_outrider.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_outrider.sh annotdependency=${annotdependency},outrider_${currentSample} done # Run the splice pipeline # Leafcutter leafcutter: #1 Converting Bam to junction files compile_dependency="" for currentSample in $samples do echo "Command : qsub -N bam2junc_$currentSample -hold_jid sortsamtobam_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.junc,LOGFILE=$ANALYSISDIR/$currentSample/logs/bam2junc.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_bam2junc.sh" qsub -N bam2junc_$currentSample -hold_jid sortsamtobam_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.junc,LOGFILE=$ANALYSISDIR/$currentSample/logs/bam2junc.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_bam2junc.sh compile_dependency=${compile_dependency},bam2junc_$currentSample done #2 Intron clustering echo "Command : qsub -N icluster -hold_jid ${compile_dependency} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTFILE=all.intron.clustering.tsv,LOGFILE=$ANALYSISDIR/iclustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_intron_clustering.sh" qsub -N icluster -hold_jid ${compile_dependency} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTFILE=all.intron.clustering.tsv,LOGFILE=$ANALYSISDIR/iclustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_intron_clustering.sh #3 design & differential intron analysis for currentSample in $samples do echo "Command : qsub -N ddintron_$currentSample -hold_jid icluster,design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v OUTPUTPREFIX=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier,INPUTFILE=$ANALYSISDIR/all.intron.clustering.tsv_perind_numers.counts.gz,DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.diff.intron.clustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_differential_intron_analysis_rare.sh" qsub -N ddintron_$currentSample -hold_jid icluster,design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v OUTPUTPREFIX=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier,INPUTFILE=$ANALYSISDIR/all.intron.clustering.tsv_perind_numers.counts.gz,DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.diff.intron.clustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_differential_intron_analysis_rare.sh done #4 add coordinates, omim annotation and filter. for currentSample in $samples do echo "Command : qsub -N format_leafcutter_$currentSample -hold_jid ddintron_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier_cluster_significance.txt,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.report.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/format_leafcutter.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_format_leafcutter.sh" qsub -N format_leafcutter_$currentSample -hold_jid ddintron_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier_cluster_significance.txt,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.report.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/format_leafcutter.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_format_leafcutter.sh annotdependency=${annotdependency},format_leafcutter_${currentSample} done # rMATS rmats: #1 run rMATs for currentSample in $samples do echo "Command : qsub -N rmats_$currentSample -hold_jid design,sortsamtobam_$currentSample -pe smp 4 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR/$currentSample/,B1=$ANALYSISDIR/$currentSample/$currentSample.b1.txt,B2=$ANALYSISDIR/$currentSample/$currentSample.b2.txt,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.rmats.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_rMATS.sh" qsub -N rmats_$currentSample -hold_jid design,sortsamtobam_$currentSample -pe smp 4 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR/$currentSample/,B1=$ANALYSISDIR/$currentSample/$currentSample.b1.txt,B2=$ANALYSISDIR/$currentSample/$currentSample.b2.txt,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.rmats.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_rMATS.sh done #2 format / filter / convert_to_bed rMATs for currentSample in $samples do echo "Command : qsub -N pprmats_$currentSample -hold_jid rmats_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTDIR=$ANALYSISDIR/$currentSample/,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rMATs.final.bed,ZSCORE=3,DELTAPSY=0.2,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.postprocessing_rMATs.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_postprocessing_rMATs.sh" qsub -N pprmats_$currentSample -hold_jid rmats_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTDIR=$ANALYSISDIR/$currentSample/,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rMATs.final.bed,ZSCORE=3,DELTAPSY=0.2,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.postprocessing_rMATs.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_postprocessing_rMATs.sh annotdependency=${annotdependency},pprmats_${currentSample} done # SJ method SJ: # Normalize SJ echo "### Normalize SJ ###" echo "Start : $(date +"%F_%H-%M-%S")" dependency="" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N normalize_SJ_${currentSample} -hold_jid aln_$currentSample -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v INPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.tab,OUTPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.norm.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/normalize_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_normalize_SJ.sh" qsub -pe smp 1 -N normalize_SJ_${currentSample} -hold_jid aln_$currentSample -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v INPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.tab,OUTPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.norm.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/normalize_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_normalize_SJ.sh dependency=${dependency},normalize_SJ_${currentSample} done dependency="-hold_jid ${dependency#,}" echo "End : $(date +"%F_%H-%M-%S")" # Create batch sjdb echo "### Create batch sjdb ###" echo "Start : $(date +"%F_%H-%M-%S")" if [ -f "$ANALYSISDIR/SJ.samples.list" ] then rm $ANALYSISDIR/SJ.samples.list fi for currentSample in $samples do echo "$ANALYSISDIR/$currentSample/$currentSample.SJ.out.norm.tab" >> $ANALYSISDIR/SJ.samples.list done echo "Command : qsub -pe smp 1 -N create_batch_sjdb ${dependency} -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v INPUTFILE=$ANALYSISDIR/SJ.samples.list,OUTPUTFILE=$ANALYSISDIR/SJ.batch.list,LOGFILE=$ANALYSISDIR/create_batch_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_create_batch_sjdb.sh" qsub -pe smp 1 -N create_batch_sjdb ${dependency} -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v INPUTFILE=$ANALYSISDIR/SJ.samples.list,OUTPUTFILE=$ANALYSISDIR/SJ.batch.list,LOGFILE=$ANALYSISDIR/create_batch_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_create_batch_sjdb.sh echo "End : $(date +"%F_%H-%M-%S")" # Annot with sjdb echo "### Annot with sjdb ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N annotate_SJ_with_sjdb_${currentSample} -hold_jid normalize_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.norm.tab,SJDB=$ANALYSISDIR/$currentSample/${currentSample}._STARgenome/sjdbList.fromGTF.out.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_sjdb.sh" qsub -pe smp 1 -N annotate_SJ_with_sjdb_${currentSample} -hold_jid normalize_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.norm.tab,SJDB=$ANALYSISDIR/$currentSample/${currentSample}._STARgenome/sjdbList.fromGTF.out.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_sjdb.sh done echo "End : $(date +"%F_%H-%M-%S")" # Annot with genes echo "### Annot with genes ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do # TODO hg19.refseq.with_genes.gtf echo "Command : qsub -pe smp 1 -N annotate_SJ_with_genes_${currentSample} -hold_jid annotate_SJ_with_sjdb_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_genes.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_genes.sh" qsub -pe smp 1 -N annotate_SJ_with_genes_${currentSample} -hold_jid annotate_SJ_with_sjdb_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_genes.${date +%F_%H-%M-%S}.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_genes.sh done echo "End : $(date +"%F_%H-%M-%S")" # Annot with batch echo "### Annot with batch ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N annotate_SJ_with_batch_${currentSample} -hold_jid annotate_SJ_with_genes_${currentSample},create_batch_sjdb -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,BATCH=$ANALYSISDIR/SJ.batch.list,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_batch.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_batch.sh" qsub -pe smp 1 -N annotate_SJ_with_batch_${currentSample} -hold_jid annotate_SJ_with_genes_${currentSample},create_batch_sjdb -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,BATCH=$ANALYSISDIR/SJ.batch.list,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_batch.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_batch.sh done echo "End : $(date +"%F_%H-%M-%S")" # Annot with GTEx echo "### Annot with GTEx ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N annotate_SJ_with_GTEx_${currentSample} -hold_jid annotate_SJ_with_batch_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_GTEx.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_GTEx.sh" qsub -pe smp 1 -N annotate_SJ_with_GTEx_${currentSample} -hold_jid annotate_SJ_with_batch_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_GTEx.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_GTEx.sh done echo "End : $(date +"%F_%H-%M-%S")" # Filter SJ echo "### Filter SJ ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N filter_SJ_${currentSample} -hold_jid annotate_SJ_with_GTEx_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/filter_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_filter_SJ.sh" qsub -pe smp 1 -N filter_SJ_${currentSample} -hold_jid annotate_SJ_with_GTEx_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/filter_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_filter_SJ.sh done echo "End : $(date +"%F_%H-%M-%S")" # convert SJ to bed which will be included in the variant file echo "### SJ to bed ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N SJ_to_bed_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.bed,LOGFILE=$ANALYSISDIR/$currentSample/logs/SJ_to_bed.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_SJ_to_bed.sh" qsub -pe smp 1 -N SJ_to_bed_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.bed,LOGFILE=$ANALYSISDIR/$currentSample/logs/SJ_to_bed.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_SJ_to_bed.sh done echo "End : $(date +"%F_%H-%M-%S")" # Annot with OMIM on filtered SJ file echo "### Annot with OMIM ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N annotate_SJ_with_OMIM_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.annot.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/annotate_SJ_with_OMIM.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_OMIM.sh" qsub -pe smp 1 -N annotate_SJ_with_OMIM_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.annot.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/annotate_SJ_with_OMIM.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_OMIM.sh annotdependency=${annotdependency},annotate_SJ_with_OMIM_${currentSample} done echo "End : $(date +"%F_%H-%M-%S")" # get wgs / wes report back into the analysis folder integration: echo "### Get dijen reports ###" echo "Start : $(date +"%F_%H-%M-%S")" echo "Command : qsub -pe smp 1 -N get_dijen_reports -hold_jid $annotdependency -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/get_dijen_reports.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/wrapper_get_dijen_reports.sh" qsub -pe smp 1 -N get_dijen_reports -hold_jid $annotdependency -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/get_dijen_reports.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/wrapper_get_dijen_reports.sh # from this point the input and output file names cannot be known dependency="" # add rnaseq analysis information to the variant files if possible echo "### Add rnaseq information to nsssi variants ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N nsssi_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=NSSSI,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh" qsub -pe smp 1 -N nsssi_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=NSSSI,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh dependency=${dependency},nsssi_annot_${currentSample} done echo "End : $(date +"%F_%H-%M-%S")" # add rnaseq analysis information to the variant files if possible echo "### Add rnaseq information to genic variants ###" echo "Start : $(date +"%F_%H-%M-%S")" for currentSample in $samples do echo "Command : qsub -pe smp 1 -N genic_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=GENIC,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh" qsub -pe smp 1 -N genic_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=GENIC,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh dependency=${dependency},genic_annot_${currentSample} done echo "End : $(date +"%F_%H-%M-%S")" # Compile .failed files to create log. dependency="-hold_jid ${dependency#,}" echo "### Compiling execution status ###" echo "Start : $(date)" echo "Command : qsub -q batch -N compile_status ${dependency} -pe smp 1 -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v STATUSFILE=$STATUSFILE,LOGFILE=$ANALYSISDIR/compile_rnaseq_status.$(date +%F_%H-%M-%S).log,ANALYSISDIR=$ANALYSISDIR $PIPELINEBASE/common/compile_rnaseq_preprocessing_status.sh" qsub -q batch -N compile_status ${dependency} -pe smp 1 -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v STATUSFILE=$STATUSFILE,LOGFILE=$ANALYSISDIR/compile_rnaseq_status.$(date +"%F_%H-%M-%S").log,ANALYSISDIR=$ANALYSISDIR $PIPELINEBASE/common/compile_rnaseq_preprocessing_status.sh echo "End : $(date)" echo "### END of pipeline execution with success but analysis jobs are still running ###" echo "END OF PIPELINE : $(date)"