#!/bin/bash

### GAD PIPELINE ###
## full_rnaseq.sh
## Description : full analysis pipeline for rnaseq data : include, preprocessing, DE, ASE, splice, VC
## Usage : qsub -v ANALYSISDIR=<path to the analysis dir>,CONFIGFILE=<path to the config file>  full_rnaseq.sh
## Output : no standard output
## Requirements : Require all pipeline scripts

## Author : yannis.duffourd@u-bourgogne.fr
## Creation Date : 20200929
## Last revision date : 20201110
## Known bugs : None.

#$ -N full_rnaseq
#$ -q batchbm
#$ -pe smp 1
#$ -V





# functions
function jumpto
{
    label=$1
    cmd=$(sed -n "/$label:/{:a;n;p;ba};" $0 | grep -v ':$')
    eval "$cmd"
    exit
}

# logging parameters.
# log file
if [ -z ${LOGFILE} ]
then
    LOGFILE=full_rnaseq.$(date +"%F_%H-%M-%S").log
fi
exec 1>> $LOGFILE 2>&1

# mandatory arguments
if [ -z ${ANALYSISDIR} ]
then
    echo "ANALYSISDIR was not defined : execution stopped"
    exit 1
fi

# Verify the config file has been passed. Otherwise : stop the script.
if [ -z ${CONFIGFILE+x} ]
then
    echo "Config file not provided by the user. You need it to run this script. Stopping execution."
    exit
fi

# usefull variables
TEMPORARY_DIR=`grep temporary_dir $CONFIGFILE | cut -f2`
DBSNP=`grep dbsnp $CONFIGFILE | cut -f2`
REF=`grep reference $CONFIGFILE | cut -f2`
GATKBASE=`grep GATKbase $CONFIGFILE | cut -f2`
JAVACMD=`grep javacmd $CONFIGFILE | cut -f2`
PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
TARGETPATH=`grep targetlist $CONFIGFILE | cut -f2`

# sample list
samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;`

# chr list
chr_list=$(grep ">" $REF | sed 's/>//')

# python path for proper execution of python
PYTHONPATH=$PIPELINEBASE/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/
export PYTHONPATH

# preprocessing status file
if [ -z ${STATUSFILE} ]
then
   STATUSFILE=$ANALYSISDIR/status.tsv
fi

# check fichier log
if [ ! -e $STATUSFILE ]
then
    echo "No status file found : Jumping to organize"
    jumpto rename
else
    echo "### Getting last analysis status ###"
    echo "Status file found : $STATUSFILE"
    # delete .failed files
    rm $ANALYSISDIR/*.failed

    # get the status
    renameRawStatus=$(grep "rename_raw" $STATUSFILE | cut -f2)
    organizeStatus=$(grep "organize_data_folder" $STATUSFILE | cut -f2)
    fastqcStatus=$(grep "process_fastqc" $STATUSFILE | cut -f2)
    trimStatus=$(grep "trim_fastq" $STATUSFILE | cut -f2)
    alnStatus=$(grep "align_star2" $STATUSFILE | cut -f2)
    cleanbamStatus=$(grep "clean_bam" $STATUSFILE | cut -f2)
    sortbamStatus=$(grep "sort_bam" $STATUSFILE | cut -f2)
    markDupStatus=$(grep "mark_duplicates" $STATUSFILE | cut -f2)
    rgStatus=$(grep "add_rg" $STATUSFILE | cut -f2)
    strimStatus=$(grep "split_trim" $STATUSFILE | cut -f2)
    recalStatus=$(grep "recal_bam" $STATUSFILE | cut -f2)
    matrixStatus=$(grep "matrix" $STATUSFILE | cut -f2)
    DEStatus=$(grep "DE" $STATUSFILE | cut -f2)
    outriderStatus=$(grep "outrider" $STATUSFILE | cut -f2)
    leafcutterStatus=$(grep "leafcutter" $STATUSFILE | cut -f2)
    rmatsStatus=$(grep "rmats" $STATUSFILE | cut -f2)
    SJStatus=$(grep "SJ" $STATUSFILE | cut -f2)
    integrationStatus=$(grep "integration" $STATUSFILE | cut -f2)


    # sample list
    samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;`


    if [ "$renameRawStatus" = "FAIL" ]
    then
		echo "Renaming step failed : jumping to organize $renameStatus"
		jumpto rename
    else
		echo "Renaming step was OK"
    fi
    if [ "$organizeStatus" = "FAIL" ]
    then
		echo "Organizing step failed : jumping to organize : $organizeStatus"
		jumpto organize
    else
		echo "Organizing step was OK"
    fi
    if [ "$fastqcStatus" = "FAIL" ]
    then
		echo "QC step failed : jumping to qc"
		jumpto qc
    else
		echo "QC step was OK"
    fi
    if [ "$trimStatus" = "FAIL" ]
    then
		echo "Trimming step failed : jumping to trim"
		jumpto trim
    else
		echo "Trimming step step was OK"
    fi
    if [ "$alnStatus" = "FAIL" ]
    then
		echo "Alignement step failed : jumping to aln"
		jumpto aln
    else
		echo "Alignement step was OK"
    fi
    if [ "$sortsamtobamStatus" = "FAIL" ]
    then
		echo "Sorting step failed : jumping to sort"
		jumpto sort
    else
		echo "Sorting step was OK"
    fi
        if [ "$cleanbamStatus" = "FAIL" ]
    then
		echo "Cleaning step failed : jumping to clean"
		jumpto clean
    else
		echo "Cleaning step was OK"
    fi
    if [ "$markDupStatus" = "FAIL" ]
    then
		echo "Duplicate marking step failed : jumping to dup"
		jumpto dup
    else
		echo "Duplicate marking step was OK"
    fi
    if [ "$recalStatus" = "FAIL" ]
    then
		echo "Recalibration step failed : jumping to recal"
		jumpto recal
    else
		echo "Recalibration step was OK"
    fi
    if [ "$matrixStatus" = "FAIL" ]
    then
  		echo "matrix step failed : jumping to recal"
  		jumpto matrix
    else
	    echo "Matrix step was OK"
    fi
    if [ "$DEStatus" = "FAIL" ]
    then
  		echo "DE step failed : jumping to DE"
  		jumpto DE
    else
	    echo "DE step was OK"
    fi
    if [ "$outriderStatus" = "FAIL" ]
    then
  		echo "Outrider step failed : jumping to Outrider"
  		jumpto outrider
    else
	    echo "Outrider step was OK"
    fi
    if [ "$leafcutterStatus" = "FAIL" ]
    then
  		echo "LeafCutter step failed : jumping to LeafCutter"
  		jumpto leafcutter
    else
	    echo "LeafCutter step was OK"
    fi
    if [ "$rmatsStatus" = "FAIL" ]
    then
  		echo "rMATs step failed : jumping to rMATs"
  		jumpto rmats
    else
	    echo "rMATs step was OK"
    fi
    if [ "$SJStatus" = "FAIL" ]
    then
  		echo "SJ step failed : jumping to SJ"
  		jumpto SJ
    else
	    echo "SJ step was OK"
    fi
    if [ "$integrationStatus" = "FAIL" ]
    then
  		echo "Integration step failed : jumping to Integration"
  		jumpto integration
    else
	    echo "Integration step was OK"
    fi

    echo "The pipeline was correctly executed, no need to relaunch a step"
    exit 0
fi


# organize the folder & rename files.
rename:
## INPUT = ANALYSISDIR variable ; OUTPUT = NONE
echo "### Organizing directory & setting analysis parameters ###"
echo "Start : $(date)"
# Rename files according to our data sheet
echo "Command : $PYTHONBIN $PIPELINEBASE/common/fastq/rename_fastq.py -d $ANALYSISDIR -f rna "
$PYTHONBIN $PIPELINEBASE/common/fastq/rename_fastq.py -d $ANALYSISDIR -f rna
rename_fastq_exitcode=$?
if [ $rename_fastq_exitcode != 0 ]
then
    echo "rename_fastq.py failed: execution stopped"
    exit 1
fi



organize:
# organize dir & set parameters for the analysis
echo "Command : $PYTHONBIN $PIPELINEBASE/common/fastq/organize_data_folder.py -d $ANALYSISDIR -p $PIPELINEBASE -t $TARGETPATH"
$PYTHONBIN $PIPELINEBASE/common/fastq/organize_data_folder.py -d $ANALYSISDIR -p $PIPELINEBASE -t $TARGETPATH
echo "End : $(date)"

sleep 5

# sample list
samples=`find $ANALYSISDIR -maxdepth 1 -mindepth 1 -type d -exec basename {} \;`


# first pass fastqc
qc:
echo "### First pass fastqc ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "FP fastqc for sample : $currentSample"
    # R1
    echo "Command : qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R1.$(date +%F_%H-%M-%S).log $PIPELINEBASE/common/fastq/process_fastqc.sh"
    qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh

    # R2
    echo "Command : qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R2.$(date +%F_%H-%M-%S).log $PIPELINEBASE/common/fastq/process_fastqc.sh"
    qsub -N fastqc_FP_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh

done
echo "End : $(date)"

# fastq trimming
trim:
echo "### trim fastq ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "Command : qsub -N trim_fastq_$currentSample -pe smp 4 -hold_jid fastqc_FP_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R1.fastq.gz,OUTPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R2.fastq.gz,FASTQCFILEONE=$ANALYSISDIR/$currentSample/QC/$currentSample.R1_fastqc/summary.txt,FASTQCFILETWO=$ANALYSISDIR/$currentSample/QC/$currentSample.R2_fastqc/summary.txt,MISEQ=F,LOGFILE=$ANALYSISDIR/$currentSample/logs/trim_fastq.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/trim_fastq.sh"
    qsub -N trim_fastq_$currentSample -pe smp 4 -hold_jid fastqc_FP_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R1.fastq.gz,OUTPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.trimmed.R2.fastq.gz,FASTQCFILEONE=$ANALYSISDIR/$currentSample/QC/$currentSample.R1_fastqc/summary.txt,FASTQCFILETWO=$ANALYSISDIR/$currentSample/QC/$currentSample.R2_fastqc/summary.txt,MISEQ=F,LOGFILE=$ANALYSISDIR/$currentSample/logs/trim_fastq.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/trim_fastq.sh
done
echo "End : $(date)"


# second pass fastqc
echo "### Second pass fastqc ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "SP fastqc for sample : $currentSample"
    # R1
    echo "Command : qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh"
    qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R1.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh

    # R2
    echo "Command : qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh"
    qsub -N fastqc_SP_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 2 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,OUTDIR=$ANALYSISDIR/$currentSample/QC/$currentSample.trimmed_fastqc,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_fastqc_SP_R2.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/fastq/process_fastqc.sh
done
echo "End : $(date)"



# alignement with STAR2 in 2pass mode
aln:
echo "### aligning sequences ###"
echo "Start : $(date)"
compile_dependency_aln=""
for currentSample in $samples
do
    echo "Command : qsub -N aln_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,LOGFILE=$ANALYSISDIR/$currentSample/logs/align_star.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/process_RNASeq_alignement_2pass.sh"
    qsub -N aln_$currentSample -hold_jid trim_fastq_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILEONE=$ANALYSISDIR/$currentSample/$currentSample.R1.fastq.gz,INPUTFILETWO=$ANALYSISDIR/$currentSample/$currentSample.R2.fastq.gz,LOGFILE=$ANALYSISDIR/$currentSample/logs/align_star.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/fastq/process_RNASeq_alignement_2pass.sh
    compile_dependency_aln=${compile_dependency_aln},aln_${currentSample}
done
compile_dependency_aln="-hold_jid ${compile_dependency_aln#,}"
echo "End : $(date)"


# sort bam
sort:
echo "### Sort & convert sam file to bam ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "Command : qsub -N sortsamtobam_$currentSample -hold_jid aln_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.Aligned.sortedByCoord.out.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/sort_sam_to_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/sort_sam_to_bam.sh"
    qsub -N sortsamtobam_$currentSample -hold_jid aln_$currentSample -pe smp 8 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.Aligned.sortedByCoord.out.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/sort_sam_to_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/sort_sam_to_bam.sh
done
echo "End : $(date)"


# clean bam
clean:
echo "### Cleaning bam file ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "Command : qsub -N clean_$currentSample -hold_jid sortsamtobam_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/clean_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/clean_bam.sh"
    qsub -N clean_$currentSample -hold_jid sortsamtobam_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/clean_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/clean_bam.sh
done
echo "End : $(date)"


# read groups
rg:
echo "### Adding @RG tags to bam file ###"
echo "Start : $(date)"
for currentSample in $samples
do
	echo "Command : qsub -N addRG_$currentSample -hold_jid clean_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,TECHNOLOGY=wes,LOGFILE=$ANALYSISDIR/$currentSample/logs/add_readgroups.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/bam/add_readgroups.sh"

	qsub -N addRG_$currentSample -hold_jid clean_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.clean.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,TECHNOLOGY=wes,LOGFILE=$ANALYSISDIR/$currentSample/logs/add_readgroups.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/bam/add_readgroups.sh
done


# mark duplicates
dup:
echo "### Mark duplicates from bam file ###"
echo "Start : $(date)"
for currentSample in $samples
do
    echo "Command : qsub -N mark_$currentSample -hold_jid addRG_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/mark_duplicates.$(date +"%F_%H-%M-%S").log,METRICSFILE=$ANALYSISDIR/$currentSample/QC/$currentSample.dedup.metrics $PIPELINEBASE/common/bam/mark_duplicates.sh"
    qsub -N mark_$currentSample -hold_jid addRG_$currentSample -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rg.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/mark_duplicates.$(date +"%F_%H-%M-%S").log,METRICSFILE=$ANALYSISDIR/$currentSample/QC/$currentSample.dedup.metrics $PIPELINEBASE/common/bam/mark_duplicates.sh
done
echo "End : $(date)"

# Split'N'Trim
strim:
echo "### split and trim the bam file ###"
echo "Start : $(date)"
for currentSample in $samples
do
	echo "Command : qsub -N strim_$currentSample -hold_jid mark_$currentSample -pe smp 3 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/split_trim.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/split_cigar_reads.sh"
	qsub -N strim_$currentSample -hold_jid mark_$currentSample -pe smp 3 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/split_trim.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/split_cigar_reads.sh
done
echo "End : $(date)"

# recalibrates bases
recal:
echo "### Recalibrating bases ###"
echo "Start : $(date)"
compile_dependency=""
for currentSample in $samples
do
    echo "Command : qsub -N recal_$currentSample -hold_jid strim_$currentSample -pe smp 6 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TABLE=$ANALYSISDIR/$currentSample/QC/$currentSample.recal_table.tsv,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/recal_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/recal_bam.sh
    compile_dependency=${compile_dependency},recal_$currentSample"
    qsub -N recal_$currentSample -hold_jid strim_$currentSample -pe smp 6 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.strim.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TABLE=$ANALYSISDIR/$currentSample/QC/$currentSample.recal_table.tsv,CONFIGFILE=$CONFIGFILE,LOGFILE=$ANALYSISDIR/$currentSample/logs/recal_bam.$(date +"%F_%H-%M-%S").log $PIPELINEBASE/common/bam/recal_bam.sh
    compile_dependency=${compile_dependency},recal_${currentSample}
done
compile_dependency="-hold_jid ${compile_dependency#,}"
echo "End : $(date)"

# Differential expression pipeline
# note that the analysis will be performed on the total batch considering it'as a classical design, with samples having the same caracteristics

# create a list of count files
matrix:
echo "Command : qsub -N create_count ${compile_dependency_aln} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_list.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_list_counts.sh"
qsub -N create_count ${compile_dependency_aln} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_list.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_list_counts.sh

# create the matrix
echo "Command : qsub -N matrix -hold_jid create_count -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTFILE=$ANALYSISDIR/all.counts.files.tsv,OUTPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/create_counts_matrix.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_create_counts_matrix.sh"
qsub -N matrix -hold_jid create_count -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTFILE=$ANALYSISDIR/all.counts.files.tsv,OUTPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/create_counts_matrix.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_create_counts_matrix.sh

# create the design files
echo "Command : qsub -N design -hold_jid matrix -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_design.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/create_design.sh"
qsub -N design -hold_jid matrix -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/create_design.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/create_design.sh


annotdependency="none"
# Run the DE analysis
#deseq
DE:
for currentSample in $samples
do
  echo "Command : qsub -N DE_$currentSample -hold_jid design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,DESIGNFILE=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_DE.$(date +"%F_%H-%M-%S").log,SAMPLE=$currentSample,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_de_analysis.sh"
  qsub -N DE_$currentSample -hold_jid design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/all.counts.matrix.tsv,DESIGNFILE=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_DE.$(date +"%F_%H-%M-%S").log,SAMPLE=$currentSample,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_de_analysis.sh
  annotdependency=${annotdependency},DE_${currentSample}
done

#outrider
outrider:
for currentSample in $samples
do
  echo "Command : qsub -N outrider_$currentSample -hold_jid design,matrix -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,MATRIX=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_outrider.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_outrider.sh"
  qsub -N outrider_$currentSample -hold_jid design,matrix -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,OUTPUTDIR=$ANALYSISDIR/$currentSample,MATRIX=$ANALYSISDIR/all.counts.matrix.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_outrider.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/DE/wrapper_run_outrider.sh
  annotdependency=${annotdependency},outrider_${currentSample}
done

# Run the splice pipeline
# Leafcutter
leafcutter:

#1 Converting Bam to junction files
compile_dependency=""
for currentSample in $samples
do
  echo "Command : qsub -N bam2junc_$currentSample -hold_jid sortsamtobam_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.junc,LOGFILE=$ANALYSISDIR/$currentSample/logs/bam2junc.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_bam2junc.sh"
  qsub -N bam2junc_$currentSample -hold_jid sortsamtobam_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.sort.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.junc,LOGFILE=$ANALYSISDIR/$currentSample/logs/bam2junc.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_bam2junc.sh
  compile_dependency=${compile_dependency},bam2junc_$currentSample
done

#2 Intron clustering
echo "Command : qsub -N icluster -hold_jid ${compile_dependency} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTFILE=all.intron.clustering.tsv,LOGFILE=$ANALYSISDIR/iclustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_intron_clustering.sh"
qsub -N icluster -hold_jid ${compile_dependency} -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTFILE=all.intron.clustering.tsv,LOGFILE=$ANALYSISDIR/iclustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_intron_clustering.sh

#3 design & differential intron analysis
for currentSample in $samples
do
  echo "Command : qsub -N ddintron_$currentSample -hold_jid icluster,design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v  OUTPUTPREFIX=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier,INPUTFILE=$ANALYSISDIR/all.intron.clustering.tsv_perind_numers.counts.gz,DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.diff.intron.clustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_differential_intron_analysis_rare.sh"
  qsub -N ddintron_$currentSample -hold_jid icluster,design -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v  OUTPUTPREFIX=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier,INPUTFILE=$ANALYSISDIR/all.intron.clustering.tsv_perind_numers.counts.gz,DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.diff.intron.clustering.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_differential_intron_analysis_rare.sh
done

#4 add coordinates, omim annotation and filter.
for currentSample in $samples
do
  echo "Command : qsub -N format_leafcutter_$currentSample -hold_jid ddintron_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v  INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier_cluster_significance.txt,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.report.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/format_leafcutter.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_format_leafcutter.sh"
  qsub -N format_leafcutter_$currentSample -hold_jid ddintron_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v  INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.outlier_cluster_significance.txt,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.leafcutter.report.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/format_leafcutter.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_format_leafcutter.sh
   annotdependency=${annotdependency},format_leafcutter_${currentSample}
done


# rMATS
rmats:
#1 run rMATs
for currentSample in $samples
do
  echo "Command : qsub -N rmats_$currentSample -hold_jid design,sortsamtobam_$currentSample -pe smp 4 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR/$currentSample/,B1=$ANALYSISDIR/$currentSample/$currentSample.b1.txt,B2=$ANALYSISDIR/$currentSample/$currentSample.b2.txt,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.rmats.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_rMATS.sh"
  qsub -N rmats_$currentSample -hold_jid design,sortsamtobam_$currentSample -pe smp 4 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v DESIGN=$ANALYSISDIR/$currentSample/$currentSample.DE.design.tsv,INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR/$currentSample/,B1=$ANALYSISDIR/$currentSample/$currentSample.b1.txt,B2=$ANALYSISDIR/$currentSample/$currentSample.b2.txt,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.rmats.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_rMATS.sh
done

#2 format / filter / convert_to_bed rMATs
for currentSample in $samples
do
  echo "Command : qsub -N pprmats_$currentSample -hold_jid rmats_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTDIR=$ANALYSISDIR/$currentSample/,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rMATs.final.bed,ZSCORE=3,DELTAPSY=0.2,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.postprocessing_rMATs.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_postprocessing_rMATs.sh"
  qsub -N pprmats_$currentSample -hold_jid rmats_$currentSample -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTDIR=$ANALYSISDIR/$currentSample/,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.rMATs.final.bed,ZSCORE=3,DELTAPSY=0.2,LOGFILE=$ANALYSISDIR/$currentSample/logs/$currentSample.postprocessing_rMATs.$(date +"%F_%H-%M-%S").log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_postprocessing_rMATs.sh
   annotdependency=${annotdependency},pprmats_${currentSample}
done

# SJ method
SJ:
# Normalize SJ
echo "### Normalize SJ ###"
echo "Start : $(date +"%F_%H-%M-%S")"
dependency=""
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N normalize_SJ_${currentSample} -hold_jid aln_$currentSample -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v INPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.tab,OUTPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.norm.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/normalize_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_normalize_SJ.sh"
    qsub -pe smp 1 -N normalize_SJ_${currentSample} -hold_jid aln_$currentSample -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v INPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.tab,OUTPUTFILE=$ANALYSISDIR/${currentSample}/${currentSample}.SJ.out.norm.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/normalize_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_normalize_SJ.sh
	dependency=${dependency},normalize_SJ_${currentSample}
done
dependency="-hold_jid ${dependency#,}"
echo "End : $(date +"%F_%H-%M-%S")"

# Create batch sjdb
echo "### Create batch sjdb ###"
echo "Start : $(date +"%F_%H-%M-%S")"
if [ -f "$ANALYSISDIR/SJ.samples.list" ]
then
  rm $ANALYSISDIR/SJ.samples.list
fi
for currentSample in $samples
do
  echo "$ANALYSISDIR/$currentSample/$currentSample.SJ.out.norm.tab" >> $ANALYSISDIR/SJ.samples.list
done
echo "Command : qsub -pe smp 1 -N create_batch_sjdb ${dependency} -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v INPUTFILE=$ANALYSISDIR/SJ.samples.list,OUTPUTFILE=$ANALYSISDIR/SJ.batch.list,LOGFILE=$ANALYSISDIR/create_batch_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_create_batch_sjdb.sh"
qsub -pe smp 1 -N create_batch_sjdb ${dependency} -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v INPUTFILE=$ANALYSISDIR/SJ.samples.list,OUTPUTFILE=$ANALYSISDIR/SJ.batch.list,LOGFILE=$ANALYSISDIR/create_batch_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_create_batch_sjdb.sh
echo "End : $(date +"%F_%H-%M-%S")"

# Annot with sjdb
echo "### Annot with sjdb ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N annotate_SJ_with_sjdb_${currentSample} -hold_jid normalize_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.norm.tab,SJDB=$ANALYSISDIR/$currentSample/${currentSample}._STARgenome/sjdbList.fromGTF.out.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_sjdb.sh"
    qsub -pe smp 1 -N annotate_SJ_with_sjdb_${currentSample} -hold_jid normalize_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.norm.tab,SJDB=$ANALYSISDIR/$currentSample/${currentSample}._STARgenome/sjdbList.fromGTF.out.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_sjdb.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_sjdb.sh
done
echo "End : $(date +"%F_%H-%M-%S")"

# Annot with genes
echo "### Annot with genes ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    # TODO hg19.refseq.with_genes.gtf
    echo "Command : qsub -pe smp 1 -N annotate_SJ_with_genes_${currentSample} -hold_jid annotate_SJ_with_sjdb_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_genes.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_genes.sh"
    qsub -pe smp 1 -N annotate_SJ_with_genes_${currentSample} -hold_jid annotate_SJ_with_sjdb_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_genes.${date +%F_%H-%M-%S}.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_genes.sh
done
echo "End : $(date +"%F_%H-%M-%S")"

# Annot with batch
echo "### Annot with batch ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N annotate_SJ_with_batch_${currentSample} -hold_jid annotate_SJ_with_genes_${currentSample},create_batch_sjdb -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,BATCH=$ANALYSISDIR/SJ.batch.list,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_batch.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_batch.sh"
    qsub -pe smp 1 -N annotate_SJ_with_batch_${currentSample} -hold_jid annotate_SJ_with_genes_${currentSample},create_batch_sjdb -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.tab,BATCH=$ANALYSISDIR/SJ.batch.list,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_batch.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_batch.sh
done
echo "End : $(date +"%F_%H-%M-%S")"

# Annot with GTEx
echo "### Annot with GTEx ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N annotate_SJ_with_GTEx_${currentSample} -hold_jid annotate_SJ_with_batch_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_GTEx.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_GTEx.sh"
    qsub -pe smp 1 -N annotate_SJ_with_GTEx_${currentSample} -hold_jid annotate_SJ_with_batch_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,LOGFILE=$ANALYSISDIR/${currentSample}/logs/annotate_SJ_with_GTEx.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_GTEx.sh
done
echo "End : $(date +"%F_%H-%M-%S")"



# Filter SJ
echo "### Filter SJ ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N filter_SJ_${currentSample} -hold_jid annotate_SJ_with_GTEx_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/filter_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE  $PIPELINEBASE/common/splice/wrapper_filter_SJ.sh"
    qsub -pe smp 1 -N filter_SJ_${currentSample} -hold_jid annotate_SJ_with_GTEx_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.out.sjdb.genes.batch.gtex.tab,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/filter_SJ.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE  $PIPELINEBASE/common/splice/wrapper_filter_SJ.sh
done
echo "End : $(date +"%F_%H-%M-%S")"

# convert SJ to bed which will be included in the variant file
echo "### SJ to bed ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N SJ_to_bed_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.bed,LOGFILE=$ANALYSISDIR/$currentSample/logs/SJ_to_bed.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_SJ_to_bed.sh"
    qsub -pe smp 1 -N SJ_to_bed_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.bed,LOGFILE=$ANALYSISDIR/$currentSample/logs/SJ_to_bed.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_SJ_to_bed.sh
done
echo "End : $(date +"%F_%H-%M-%S")"

# Annot with OMIM on filtered SJ file
echo "### Annot with OMIM ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
    echo "Command : qsub -pe smp 1 -N annotate_SJ_with_OMIM_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.annot.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/annotate_SJ_with_OMIM.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_OMIM.sh"
    qsub -pe smp 1 -N annotate_SJ_with_OMIM_${currentSample} -hold_jid filter_SJ_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.filter.tsv,OUTPUTFILE=$ANALYSISDIR/$currentSample/${currentSample}.SJ.annot.tsv,LOGFILE=$ANALYSISDIR/$currentSample/logs/annotate_SJ_with_OMIM.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_SJ_with_OMIM.sh
     annotdependency=${annotdependency},annotate_SJ_with_OMIM_${currentSample}
done
echo "End : $(date +"%F_%H-%M-%S")"

# get wgs / wes report back into the analysis folder
integration:
echo "### Get dijen reports ###"
echo "Start : $(date +"%F_%H-%M-%S")"
echo "Command : qsub -pe smp 1 -N get_dijen_reports -hold_jid $annotdependency -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/get_dijen_reports.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/wrapper_get_dijen_reports.sh"
qsub -pe smp 1 -N get_dijen_reports -hold_jid $annotdependency -o $ANALYSISDIR -e $ANALYSISDIR -v INPUTDIR=$ANALYSISDIR,OUTPUTDIR=$ANALYSISDIR,LOGFILE=$ANALYSISDIR/get_dijen_reports.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/wrapper_get_dijen_reports.sh

# from this point the input and output file names cannot be known
dependency=""
# add rnaseq analysis information to the variant files if possible
echo "### Add rnaseq information to nsssi variants ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
  echo "Command : qsub -pe smp 1 -N nsssi_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=NSSSI,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh"
  qsub -pe smp 1 -N nsssi_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=NSSSI,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh
  dependency=${dependency},nsssi_annot_${currentSample}
done
echo "End : $(date +"%F_%H-%M-%S")"

# add rnaseq analysis information to the variant files if possible
echo "### Add rnaseq information to genic variants ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
  echo "Command : qsub -pe smp 1 -N genic_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=GENIC,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh"
  qsub -pe smp 1 -N genic_annot_${currentSample} -hold_jid get_dijen_reports -o $ANALYSISDIR/${currentSample}/logs/ -e $ANALYSISDIR/${currentSample}/logs/ -v SAMPLE=$currentSample,TYPE=GENIC,LOGFILE=$ANALYSISDIR/${currentSample}/logs/nsssi.rnaseq.annotation.$(date +%F_%H-%M-%S).log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/common/splice/wrapper_annotate_report_with_rnaseq.sh
  dependency=${dependency},genic_annot_${currentSample}
done
echo "End : $(date +"%F_%H-%M-%S")"


# Compile .failed files to create log.
dependency="-hold_jid ${dependency#,}"
echo "### Compiling execution status ###"
echo "Start : $(date)"
echo "Command : qsub -q batch -N compile_status ${dependency} -pe smp 1 -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v STATUSFILE=$STATUSFILE,LOGFILE=$ANALYSISDIR/compile_rnaseq_status.$(date +%F_%H-%M-%S).log,ANALYSISDIR=$ANALYSISDIR $PIPELINEBASE/common/compile_rnaseq_preprocessing_status.sh"
qsub -q batch -N compile_status ${dependency} -pe smp 1 -o $ANALYSISDIR/ -e $ANALYSISDIR/ -v STATUSFILE=$STATUSFILE,LOGFILE=$ANALYSISDIR/compile_rnaseq_status.$(date +"%F_%H-%M-%S").log,ANALYSISDIR=$ANALYSISDIR $PIPELINEBASE/common/compile_rnaseq_preprocessing_status.sh
echo "End : $(date)"

echo "### END of pipeline execution with success but analysis jobs are still running ###"
echo "END OF PIPELINE : $(date)"