ccub_test

5f8e477e · Simon Verdez · 5e0a7672 · 5f8e477e · 5f8e477e · 5f8e477e
Commit 5f8e477e authored Aug 07, 2018 by Simon Verdez
11 changed files
--- a/Master.sh
+++ b/Master.sh
 #!/bin/bash
-set -e;
+### GAD PIPELINE ###
-true;
+## Master.sh
+## Version : 1.0
+## Description : process pharmacoAnnot for exome data
+## Usage : qsub -v ANALYSISDIR=<path to the analysis dir>,CONFIGFILE=<path to the config file>,PHARMCONFIG=<path to the config_pharm file> Master.sh
+## Output : no standard output
+## Requirements : Require pipeline scripts
-i=$1
+## Author : Simon.verdez@u-bourgogne.fr
+## Creation Date : 20180725
+## Last revision date : 20180725
+## Known bugs : None.
-if [ ! -d /work/gad/sv347413/epilepsie/tmp_res/$i ]
+#$ -N Master
+#$ -q batchbm
+#$ -pe smp 1
+#$ -V
+function jumpto
+{
+    label=$1
+    cmd=$(sed -n "/$label:/{:a;n;p;ba};" $0 | grep -v ':$')
+    eval "$cmd"
+    exit
+}
+#Logging
+logbasename=$(date +"%F_%H-%M-%S")
+vcfbasename=$(date +"%F")
+reportbasename=$(date +"%F")
+if [ -z ${LOGFILE} ]
 then
-mkdir /work/gad/sv347413/epilepsie/tmp_res/$i
+    LOGFILE=PharmAnnot.$logbasename.log
 fi
+exec 1>> $LOGFILE 2>&1
-if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf ]
+# Mandatory arguments
+if [ -z ${ANALYSISDIR} ]
 then
-echo "SNP"
+    echo "ANALYSISDIR was not defined : execution stopped"
-/work/gad/sv347413/jdk1.8.0_111/bin/java -jar /work/gad/shared/bin/snpEff/SnpSift.jar annotate pharmgkb_v2.vcf /work/gad/sv347413/epilepsie/vcf_tmp/$i.* > /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf
+    exit 1
 fi
-if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf ]
+if [ -z ${CONFIGFILE+x} ]
 then
-echo "CNV"
+    echo "Config file not provided by the user. You need it to run this script. Stopping execution."
-cp /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf
+    exit
 fi
-if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf ]
+if [ -z ${PHARMCONFIG+x} ]
 then
-echo "CNV2"
+    echo "Config file not provided by the user. You need it to run this script. Stopping execution."
-python2.7 tsv2vcf.py /work/gad/sv347413/epilepsie/tsv_tmp/$i* /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf
+    exit
-mv /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf
 fi
-if [ ! -e /work/gad/sv347413/epilepsie/results_html/$i.html ]
+#Usefull variables
+PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
+TSVFILE=`grep "pharmagktab" $PHARMCONFIG | cut -f2`
+PHARMGKB=`grep "pharmagkbase" $PHARMCONFIG | cut -f2`
+OMIM=`grep "omim" $PHARMCONFIG | cut -f2`
+HLALIST=`grep "HLAlist" $PHARMCONFIG | cut -f2`
+HEADER=`grep "header" $PHARMCONFIG | cut -f2`
+# python path for proper execution of python
+PYTHONPATH=$PIPELINEBASE/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/
+export PYTHONPATH
+# Varcall status file
+if [ -z ${STATUSFILE} ]
 then
-echo "html"
+   STATUSFILE=$ANALYSISDIR/status.tsv
-python2.7 create_html_v2.py /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf /work/gad/sv347413/HLAscan/results/$i/Report /work/gad/sv377413/HLAscan/bam_tmp/$i.bam > /work/gad/sv347413/epilepsie/results_html/$i.html
 fi
+# Sample list
+samples=`find -L $ANALYSISDIR/ -maxdepth 1 -mindepth 1 -type d | xargs -l basename`
+# Batch sample list
+if [ -e $ANALYSISDIR/batch.list ]
+then
+    rm $ANALYSISDIR/batch.list
+fi
+find -L $ANALYSISDIR/ -maxdepth 1 -mindepth 1 -type d | xargs -l basename >> $ANALYSISDIR/batch.list
+# 0 : Check status file
+if [ ! -e $STATUSFILE ]
+then
+    echo "No status file found : Jumping to configuration step"
+    jumpto configure
+else
+    echo "### Getting last analysis status ###"
+    # delete .failed files
+    failedfile=($ANALYSISDIR/*.failed)
+    if [ -e ${failedfile[0]} ]
+    then
+        rm $ANALYSISDIR/*.failed
+    fi
+    # get the status
+    REF=`grep "reference" $STATUSFILE | cut -f2`
+    HLAscan=`grep "process_HLAscan" $STATUSFILE | cut -f2`
+    varcallSNP=`grep "process_varcallSNP" $STATUSFILE | cut -f2`
+    varcallCNV=`grep "process_varcallCNV" $STATUSFILE | cut -f2`
+    createHTML=`grep "process_createHTML" $STATUSFILE | cut -f2`
+    if [ "$HLAscan" == "FAIL" ]
+    then
+	    echo "HLAscan step failed : jumping to HLAscan step"
+	    jumpto HLAscan
+    else
+	    echo "HLAscan step was OK"
+    fi
+    if [ "$varcallSNP" == "FAIL" ]
+    then
+	    echo "Variant calling SNP first step failed : jumping to variant calling SNP first step"
+	    jumpto varcallSNP
+    else
+	    echo "Variant calling SNP first step was OK"
+    fi
+    if [ "$varcallCNV" == "FAIL" ]
+    then
+	    echo "Variant calling CNV second step failed : jumping to variant calling CNV second step"
+	    jumpto varcallCNV
+    else
+	    echo "Variant calling CNV second step was OK"
+    fi
+    if [ "$createHTML" == "FAIL" ]
+    then
+      echo "CreateHTML step failed : jumping to variant create HTML step"
+      jumpto createHTML
+    else
+      echo "CreateHTML step was OK"
+    fi
+    echo "pipeline was correctly executed, no need to relaunch a step" 
+    exit 0 
+fi
+configure:
+# Configure variant calling analysis
+echo "### Configuration step ###"
+echo "Start : $(date +"%F_%H-%M-%S")"
+varcallfiles=($ANALYSISDIR/*/*.Master.list)
+if [ -e ${varcallfiles[0]} ]
+then
+    rm $ANALYSISDIR/*/*.Master.list
+fi
+situation=0
+# case no family file
+if [ "$situation" -eq "0" ]
+then
+    echo "No family file"
+    for currentSample in $samples
+    do
+        echo $currentSample >> $ANALYSISDIR/$currentSample/$currentSample.Master.list
+    done
+fi
+HLAscan:
+# HLA genotyping
+echo "### Execute HLAscan ###"
+echo "Start : $(date +"%F_%H-%M-%S")"
+for currentSample in $samples
+do
+    echo "Command : qsub -pe smp 1 -N HLAscan_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/HLAscan,GeneList=$GL,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_HLAscan.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/haplo_scan.sh"
+    qsub -pe smp 1 -N HLAscan_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v BAM=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,SORTIE=$ANALYSISDIR/$currentSample/HLAscan,GeneList=$GL,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_HLAscan.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/haplo_scan.sh
+done
+echo "End : $(date +"%F_%H-%M-%S")"
+varcallSNP:
+# Variant calling first step
+echo "### Variant calling SNP first step ###"
+echo "Start : $(date +"%F_%H-%M-%S")"
+for currentSample in $samples
+do
+    echo "Command : qsub -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -N varcallSNP_${currentSample} -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.raw.vcf,PHARMGKB=$PHARMGKB,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallSNP.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/annotate_variants_pharmAnnot.sh"
+    qsub -pe smp 1 -N varcallSNP_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.raw.vcf,PHARMGKB=$PHARMGKB,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallSNP.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/annotate_variants_pharmAnnot.sh
+done
+echo "End : $(date +"%F_%H-%M-%S")"
+varcallCNV:
+# Variant calling second step
+echo "### Variant calling CNV second step ###"
+echo "Start : $(date +"%F_%H-%M-%S")"
+for currentSample in $samples
+do
+    echo "Command : cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf"
+    cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf
+    cnvfile=`ls -lht $ANALYSISDIR/$currentSample/ | grep "\.annot.tsv" | head -1 | awk '{print $9}'`
+    echo "Command : qsub -pe smp 1 -N varcallCNV_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh"
+    qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh
+done
+echo "End : $(date +"%F_%H-%M-%S")"
+createHTML:
+# Generate a HTML report_vcf
+echo "### Generate HTML report ###"
+echo "Start : $(date +"%F_%H-%M-%S")"
+for currentSample in $samples
+do
+    echo "Command : qsub -pe smp 1 -N createHTML_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v VCF_SNP=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,VCF_CNV=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,REPORTFILE=$ANALYSISDIR/$currentSample/HLAscan/Report,BAMFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,TSVFILE=$TSVFILE,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.html,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_create_html.sh"
+    qsub -pe smp 1 -N createHTML_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v VCFFILE_SNP=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,VCFFILE_CNV=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,REPORTFILE=$ANALYSISDIR/$currentSample/$currentSample.report,BAMFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TSVFILE=$TSVFILE,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.html,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_create_html.sh
+done
+echo "End : $(date +"%F_%H-%M-%S")"
+MergeVCF:
+#merge vcf fies
+echo "### Merge VCF files"
+echo "Start : $(date +"%F_%H-%M-%S")"
+for currentSample in $samples
+do
+    echo "Command : qsub -pe smp 1 -N mergeVCF_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE1=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,INPUTFILE2=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.merge.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/merge_GATK.sh"
+    qsub -pe smp 1 -N mergeVCF_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE1=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,INPUTFILE2=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/merge_GATK.sh
+done
+echo "End : $(date +"%F_%H-%M-%S")"
--- a/VCF_tools.pyc
+++ b/VCF_tools.pyc
--- a/analysis_config.tsv
+++ b/analysis_config.tsv
+pipelinebase	/work/gad/shared/pipeline/pharmacogen_test/pharmAnnot
+pythonbin	/work/gad/shared/bin/miniconda2/bin/python
+GATKbase	/work/gad/shared/bin/GATK_3.7
+javacmd	/soft/java/jdk_x64/bin/java
+picardbase	/work/gad/shared/bin/PICARD_2.4.1
+temporary_dir	/work/gad/shared/tmp/
+reference	/work/gad/shared/pipeline/hg19/index/hg19_essential.fa
+refdict	/work/gad/shared/pipeline/hg19/index/hg19_essential.dict
+refdir	/work/gad/shared/pipeline/hg19/index
+dbsnp	/work/gad/shared/pipeline/hg19/clinvar/dbsnp.vcf
+mainqueue	batch
+autosubqueue	batchbm
+bwacmd	/work/gad/shared/bin/bwa-0.7.15
+fastqccmd	/work/gad/shared/bin/FastQC_0.11.4/fastqc
+fastqc_config	/work/gad/shared/bin/FastQC_0.11.4/Configuration/limits.txt
+trimmomatic	/work/gad/shared/bin/Trimmomatic-0.35/trimmomatic-0.35.jar
+adapters_file	/work/gad/shared/bin/Trimmomatic-0.35/adapters/TruSeq3-PE-All.fa
+samtools	/work/gad/shared/bin/samtools-1.5/
+igvtools	/work/gad/shared/bin/IGVTools_2.3.67/igvtools.jar
+targetlist	/work/gad/shared/pipeline/hg19/targets/
+refseqlist	/work/gad/shared/pipeline/hg19/refseq/refseq.cds.2bp.hg19.with_genes.interval_list
+refseqgenelist	/work/gad/shared/pipeline/hg19/refseq/genetrack.hg19.sorted.refseq
+controlfile	/work/gad/shared/pipeline/samples/control_100_samples.g.vcf
+email	emilie.tisserant@u-bourgogne.fr
+clinvar_vcf	/work/gad/shared/pipeline/hg19/clinvar/clinvar.vcf
+cosmic_vcf	/work/gad/shared/pipeline/hg19/cosmic/CosmicCodingMuts.vcf
+omim_list	/work/gad/shared/pipeline/hg19/omim/OMIM2.list
+acmg_list	/work/gad/shared/pipeline/hg19/omim/ACMG.list
+common_list	/work/gad/shared/pipeline/hg19/clinvar/common.list
+ssa_nb_files	10
+tabixpath	/work/gad/shared/bin/htslib-1.3/bin/tabix
+kaviar	/work/gad/shared/pipeline/hg19/kaviar/Kaviar-160204-Public-hg19.vcf.gz
+regulomedb	/work/gad/shared/pipeline/hg19/regulomeDB/RegulomeDB.dbSNP141.txt.gz
+targetscan	/work/gad/shared/pipeline/hg19/targetscan/Predicted_Targets.hg19.bed
+cytobands	/work/gad/shared/pipeline/hg19/cnv/cytobands.tsv
+dgv_variants	/work/gad/shared/pipeline/hg19/cnv/GRCh37_hg19_variants_2015-07-23.txt.gz
+cnv_map	/work/gad/shared/pipeline/hg19/cnv/Stringent.hg19.2015-02-03.txt.gz
+isca_benign_cnv	/work/gad/shared/pipeline/hg19/cnv/iscaBenign.txt.gz
+isca_pathogenic_cnv	/work/gad/shared/pipeline/hg19/cnv/iscaPathogenic.txt.gz
+devdelay_control_cnv	/work/gad/shared/pipeline/hg19/cnv/cnvDevDelayControl.txt.gz
+devdelay_case_cnv	/work/gad/shared/pipeline/hg19/cnv/cnvDevDelayCase.txt.gz
+clingen_dosage_sensitive_gene	/work/gad/shared/pipeline/hg19/cnv/ClinGen_gene_curation_list.tsv
+xhmmbin	/work/gad/shared/bin/xhmm_1.0/xhmm
+xhmmparams	/work/gad/shared/pipeline/hg19/cnv/xhmm_params.txt
+cnvtarget	/work/gad/shared/pipeline/hg19/cnv/cnv_targets_refseq_only_coding_exon_2bp.hg19.list
+targetgc	/work/gad/shared/pipeline/hg19/cnv/cnv_targets_refseq_only_coding_exon_2bp.hg19_GC.txt
+docmatrix	/work/gad/shared/pipeline/hg19/cnv/ALL.CNV.RD.txt
+extremegc	/work/gad/shared/pipeline/hg19/cnv/extreme_gc_targets.txt
+lowcomplexity	/work/gad/shared/pipeline/hg19/cnv/low_complexity_targets.txt
+controlfreec	/work/gad/shared/bin/ControlFreeC
+gemMappabilityFile	/work/gad/shared/pipeline/hg19/cnv/out100m1_hg19.gem
+chrLenFile	/work/gad/shared/pipeline/hg19/index/hg19.len
+chrFiles	/work/gad/shared/pipeline/hg19/index/chr/
+lumpybase	/work/gad/shared/bin/lumpy/lumpy-sv-master/
+mapabilitybedgraph	/work/gad/shared/pipeline/hg19/cnv/wgEncodeCrgMapabilityAlign100mer_no_uniq.bedgraph
+blacklistfile	/work/gad/shared/pipeline/hg19/cnv/wgEncodeDacMapabilityConsensusExcludable.txt
+repeatmasker	/work/gad/shared/pipeline/hg19/cnv/UCSC_rmsk.hg19.sort.bed.gz
+bedtools	/work/gad/shared/bin/bedtools
+nist_bed	/work/gad/shared/pipeline/hg19/nist/agilent_CRE.refseq_cds.NA12878_GIAB_highconf.intersect.hg19.bed
+nist_vcf	/work/gad/shared/pipeline/hg19/nist/NA12878_GIAB_highconf.agilent.refseq.intersect.vcf
+hg19gtf	/work/gad/shared/pipeline/hg19/refseq/hg19.refseq.with_genes.gtf
+star2	/work/gad/shared/bin/STAR-2.5.2b/source
+circosconfdir	/work/gad/shared/pipeline/hg19/circos/
+circosbin	/work/gad/shared/bin/circos-0.69-2/bin/circos
+rscript	/soft/R/3.2.3/intel/13.1.3/bin/Rscript
+snpeffbase	/work/gad/shared/bin/snpEff
+exacvcf	/work/gad/shared/pipeline/hg19/ExAC/ExAC.r0.3.1.sites.vep.vcf.gz
+exacscores	/work/gad/shared/pipeline/hg19/ExAC/fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt
+dbnsfp	/work/gad/shared/bin/snpEff/data/dbNSFP/dbNSFP2.9.txt.gz
+gnomadex	/work/gad/shared/pipeline/hg19/ExAC/gnomad.exomes.r2.0.2.sites.vcf.gz
+gnomadge	/work/gad/shared/pipeline/hg19/ExAC/gnomad.genomes.r2.0.2.sites.vcf.gz
+gnomadchr	/work/gad/shared/pipeline/hg19/ExAC/gnomad.genomes.r2.0.2.sites
+hgmd_vcf	/work/gad/shared/pipeline/hg19/HGMD/HGMD_PRO_2017.2_hg19.vcf
+regulatoryfeatures	/work/gad/shared/pipeline/hg19/ensembl_regulation/regulatory_features.bed
+motiffeatures	/work/gad/shared/pipeline/hg19/ensembl_regulation/motif_features.bed
+encodednase	/work/gad/shared/pipeline/hg19/Encode/wgEncodeRegDnaseClusteredV3.bed
+encodetfbs	/work/gad/shared/pipeline/hg19/Encode/wgEncodeRegTfbsClusteredV3.bed
+linsight	/work/gad/shared/pipeline/hg19/LINSIGHT
+fantom	/work/gad/shared/pipeline/hg19/FANTOM/fantom_cage_peak.bed
+isgenic	/work/gad/shared/pipeline/hg19/FANTOM/isGenic.bed
+genelists	/work/gad/shared/pipeline/hg19/gene_lists/
+vtbase	/work/gad/shared/bin/vt
--- a/annotate_variants_pharmAnnot.sh
+++ b/annotate_variants_pharmAnnot.sh
+#!/bin/ksh
+### GAD PIPELINE ###
+## annotate_variants_snpeff.sh
+## Version : 1.0
+## Description : This script allows user to launch SnpEff and SNpSift for basic variant annotation, frequency annotation and score annotation
+## Usage : qsub -pe smp <nb thread> -v INPUTFILE=<raw/vcf/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,PHARMGKB=<ref/file/pharmgkb>,[NOCHECK=yes|no],[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] annotate_variants_pharmAnnot.sh
+## Output : .vcf file containing annotations
+## Requirements : jdk 1.8.0
+## Author : Simon.Verdez@u-bourgogne.fr
+## Creation Date : 20180727
+## last revision date : 20180727
+## Known bugs : None
+#$ -q batch
+#$ -V
+# Log file path option
+if [ -z ${LOGFILE} ]
+then
+    LOGFILE=annotate_variants_snpeff.$(date +"%F_%H-%M-%S").log
+fi
+# Config file path option
+if [ -z ${CONFIGFILE} ]
+then
+    CONFIGFILE=analysis_config.tsv
+fi
+# Logging
+exec 1>> $LOGFILE 2>&1
+echo "$(date +"%F_%H-%M-%S"): START"
+# Check if config file exist
+if [ ! -f $CONFIGFILE ]
+then
+    echo "Config file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_snpeff.failed
+    exit 1
+fi
+# Check if input file exist
+if [ ! -f $INPUTFILE ]
+then
+    echo "Input file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_snpeff.failed
+    exit 1
+fi
+if [ ! -f $PHARMGKB ]
+then
+    echo "ref pharmgkb file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_snpeff.failed
+    exit 1
+fi
+# Check if output file is specified
+if [ -z $OUTPUTFILE ]
+then
+    echo "Output file is not specified"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_snpeff.failed
+    exit 1
+fi
+JAVA=$(grep "javacmd" $CONFIGFILE | awk '{print $2}')
+PATHSCRIPT=$(grep "pipelinebase" $CONFIGFILE | awk '{print $2}')
+SNPEFF=$(grep "snpeffbase" $CONFIGFILE | awk '{print $2}')
+# Launch Snpeff and SnpSift command and check exit code
+echo "Command: $JAVA -Xmx4g -jar $SNPEFF/SnpSift.jar Annotate $PHARMGKB $INPUTFILE > $OUTPUTFILE"
+bash -o pipefail  -c "$JAVA -Xmx4g -jar $SNPEFF/SnpSift.jar Annotate $PHARMGKB $INPUTFILE > $OUTPUTFILE"
+snpeff_exitcode=$?
+echo "snpeff exit code : $snpeff_exitcode"
+if [ $snpeff_exitcode != 0 ]
+then
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_Annot.failed
+    exit 1
+fi
+if [ -z ${NOCHECK} ] || [ ${NOCHECK} == "no" ]
+then
+    chr=`tail -1 $OUTPUTFILE | cut -f1`
+    if [ $chr != "chrX" ] && [ $chr != "chrY" ] && [ $chr != "X" ] && [ $chr != "Y" ]
+    then
+        echo "vcf file does not seem complete"
+        echo "$(date +"%F_%H-%M-%S"): END"
+        touch annotate_variants_snpeff.failed
+        exit 1
+    fi
+fi
+echo "$(date +"%F_%H-%M-%S"): END"
--- a/create_html_v2.py
+++ b/create_html_v2.py
+#!/usr/bin/python
+### PharmAnnot PIPELINE ###
+## create_html_v2.py
+## Version : 1.0.0
+## Description : Create a html file with vcf file, BAM file and report HLAscan
+## Usage : python create_html_v2.py -v vcf_SNP -c vcf_CNV -h report_HLA -b BAM -t tsvfile -o html -e log
+## Output : Annotated vcf file
+## Requirements : python 2.7+
+## Author : Simon.Verdez@u-bourgogne.fr
+## Creation Date : 20180731
+## last revision date : 20180731
+## Known bugs : None
+#Example : python create_html_v2.py dijex2000.vcf report_HLA BAM
 import sys
 import subprocess
-from pyexcel_ods import *
+import csv
 from pysam import VariantFile
 import htmltag
+# Options
+try:
+    opts, args = getopt.getopt(sys.argv[1:], 'v:h:b:o:e:')
+    for opt, arg in opts:
+        if opt in ("-v"):
+            vcfSNP = arg
+        if opt in ("-c"):
+            vcfCNV = arg
+        if opt in ("-h"):
+            report = arg
+        if opt in ("-b"):
+            bam = arg
+        if opt in ("-t"):
+            tsvfile = arg
+        elif opt in ("-o"):
+            sys.stdout = open(arg, 'w')
+        elif opt in ("-e"):
+            logfile = arg
+except getopt.GetoptError:
+    print 'usage : '
+    sys.exit(1)
+# Logging
+logging.basicConfig(filename =  '%s' % (logfile), filemode = 'a', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
+logging.info('start')
+# Manage options
+if not vars().has_key('vcf_SNP'):
+    logging.exception('No vcf SNP file given')
+    sys.exit(1)
+if not vars().has_key('vcf_CNV'):
+    logging.exception('No vcf CNV file given')
+    sys.exit(1)
+if not vars().has_key('report'):
+    logging.exception('No report HLAscan given')
+    sys.exit(1)
+if not vars().has_key('bam'):
+    logging.exception('No bam file given')
+    sys.exit(1)
+if not vars().has_key('tsvfile'):
+    logging.exception('No tsv file given')
+    sys.exit(1)
+# Check files
+try:
+	f = open(vcf_SNP, 'r')
+	f.close()
+except IOError:
+    logging.exception('vcf_SNP file does not exist')
+    sys.exit(1)
+try:
+	f = open(vcf_CNV, 'r')
+	f.close()
+except IOError:
+    logging.exception('vcf_CNV file does not exist')
+    sys.exit(1)
+try:
+	f = open(report, 'r')
+	f.close()
+except IOError:
+    logging.exception('Report HLAscan file does not exist')
+    sys.exit(1)
+try:
+	f = open(bam, 'r')
+	f.close()
+except IOError:
+    logging.exception('BAM file does not exist')
+    sys.exit(1)
+try:
+	f = open(tsvfile, 'r')
+	f.close()
+except IOError:
+    logging.exception('Tsv file does not exist')
+    sys.exit(1)
 " construction du formulaire en premier "
 head = htmltag.HEAD()
 head = htmltag.append(htmltag.TITLE('pharmacogenetic'))
@@ -14,17 +107,16 @@ head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar li { backgro
 head = head.append(htmltag.STYLE(type="text/css").append("P1 {color: red }"))
 head = head.append(htmltag.STYLE(type="text/css").append("P2 { color: orange }"))
 body = htmltag.BODY()
 body = body.append(htmltag.UL(CLASS = "navbar").append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://www.pharmgkb.org").append("pharmgkb")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://cpicpgx.org").append("cpicpgx")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "http://www.pharmacogenetics.fr/").append("pharmacogenetics.fr")))))
 body = body.append(htmltag.H2('Variants pharmgkb'))
 """extraction des donnees """
-data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods")
+data = get_data(tsvfile)
-vcf_reader_SNP = VariantFile(sys.argv[1])
+vcf_reader_SNP = VariantFile(vcfSNP)
-vcf_reader_CNV = VariantFile(sys.argv[2])
+vcf_reader_CNV = VariantFile(vcfCNV)
-BAM_FILE = VariantFile(sys.argv[4])
+BAM_FILE = VariantFile(bam)
-Report_HLA = open(sys.argv[3],"r")
+Report_HLA = open(report,"r")
 for record in vcf_reader_SNP.fetch():
    try:
@@ -60,14 +152,13 @@ for record in vcf_reader_CNV.fetch():
            ListeM = []
            ListeD = []
            body= body.append(htmltag.P("Deletion detectee par xHMM sur le gene "+record.info["GENE"]))
-            for i in range(len(data["hg19"])-1):
+            with open(data) as tsvfile:
-                if not data["hg19"][i]:
+                reader = csv.DictReader(tsvfile, dialect='excel-tab')
-                    pass
+                for row in reader:
-                else:
+                    if row['gene'] != "":
-                    if data["hg19"][i][1] != "gene":
+                        if row['drug'] == record.info["GENE"]:
-                        if data["hg19"][i][1] == record.info["GENE"]:
+                            if row['drug'] not in ListeM:
-                            if data["hg19"][i][0] not in ListeM:
+                                ListeG.append(row['drug'])
-                                ListeM.append(data["hg19"][i][0])
            for Mol in ListeM:
                body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
            depth_ave = 0.0
@@ -93,14 +184,13 @@ for record in vcf_reader_CNV.fetch():
        if elm == "<DUP>":
 	    ListeM = []
            body= body.append(htmltag.P("Duplication detectee par xHMM sur le gene "+record.info["GENE"]))
-            for i in range(len(data["hg19"])-1):
+            with open(data) as tsvfile:
-                if not data["hg19"][i]:
+                reader = csv.DictReader(tsvfile, dialect='excel-tab')
-                    pass
+                for row in reader:
-                else:
+                    if row['gene'] != "":
-                    if data["hg19"][i][1] != "gene":
+                        if row['drug'] == record.info["GENE"]:
-                        if data["hg19"][i][1] == record.info["GENE"]:
+                            if row['drug'] not in ListeM:
-                            if data["hg19"][i][0] not in ListeM:
+                                ListeG.append(row['drug'])
-                                ListeM.append(data["hg19"][i][0])
            for Mol in ListeM:
                body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
 HLA = Report_HLA.readlines()
@@ -138,20 +228,19 @@ body = body.append(htmltag.P("HLA-C : "+ListeC[0]+"  / "+ListeC[1]))
 body = body.append(htmltag.P("HLA-DQB1 : "+ListeDQB1[0]+" /  "+ListeDQB1[1]))
 body = body.append(htmltag.P("HLA-DRB1 : "+ListeDRB1[0]+"  / "+ListeDRB1[1]))
-for i in range(len(data["hg19"])-1):
+with open(data) as tsvfile:
-    if not data["hg19"][i]:
+    reader = csv.DictReader(tsvfile, dialect='excel-tab')
-        pass
+    for row in reader:
-    else:
+        if row['gene'] != "":
-        if data["hg19"][i][1] != "gene":
+            if row['variants'].startswith("HLA-A"):
-            if data["hg19"][i][2].startswith("HLA-A"):
+                Adigit = row['variants'].split("*")[1]
-                Adigit = data["hg19"][i][2].split("*")[1]
                Adigit2 = Adigit.split(":")[0]+Adigit.split(":")[1]
                HL1 = ListeA[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1]
                HL2 = ListeA[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1]
                if HL1 == Adigit2:
-                    pheno = data["hg19"][i][4]
+                    pheno = row['effect']
-                    publi = data["hg19"][i][5]
+                    publi = row['article']
-                    level = str(data["hg19"][i][10])
+                    level = str(row['LEVEL'])
                    if level.startswith("1"):
                        body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
@@ -162,9 +251,9 @@ for i in range(len(data["hg19"])-1):
                        body = body.append(htmltag.P(pheno)+htmltag.P(level))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
                if HL2 == Adigit2:
-                    pheno = data["hg19"][i][4]
+                    pheno = row['effect']
-                    publi = data["hg19"][i][5]
+                    publi = row['article']
-                    level = str(data["hg19"][i][10])
+                    level = str(row['LEVEL'])
                    if level.startswith("1"):
                        body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
@@ -175,15 +264,15 @@ for i in range(len(data["hg19"])-1):
                        body = body.append(htmltag.P(pheno)+htmltag.P(level))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
-            if data["hg19"][i][2].startswith("HLA-B"):
+            if row['varaints'].startswith("HLA-B"):
-                Bdigit = data["hg19"][i][2].split("*")[1]
+                Bdigit = row['variants'].split("*")[1]
-                Bdigit2 = Adigit.split(":")[0]+Adigit.split(":")[1]
+                Bdigit2 = Bdigit.split(":")[0]+Bdigit.split(":")[1]
-                HL1 = ListeB[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1]
+                HL1 = ListeB[0].split("*")[1].split(":")[0]+ListeB[0].split("*")[1].split(":")[1]
-                HL2 = ListeB[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1]
+                HL2 = ListeB[1].split("*")[1].split(":")[0]+ListeB[1].split("*")[1].split(":")[1]
                if HL1 == Bdigit2:
-                    pheno = data["hg19"][i][4]
+                    pheno = row['effect']
-                    publi = data["hg19"][i][5]
+                    publi = row['article']
-                    level = str(data["hg19"][i][10])
+                    level = str(row['LEVEL'])
                    if level.startswith("1"):
                        body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
@@ -194,9 +283,9 @@ for i in range(len(data["hg19"])-1):
                        body = body.append(htmltag.P(pheno)+htmltag.P(level))
 			body = body.append(htmltag.P(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
                if HL2 == Bdigit2:
-                    pheno = data["hg19"][i][4]
+                    pheno = row['effect']
-                    publi = data["hg19"][i][5]
+                    publi = row['article']
-                    level = str(data["hg19"][i][10])
+                    level = str(row['LEVEL'])
                    if level.startswith("1"):
                        body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
 			body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))

--- a/haplo_scan.sh
+++ b/haplo_scan.sh
+#!/bin/bash
+###GAD PIPELINE ###
+## haplo_scan.sh
+## Version : 1.0.0
+## Description : This scprit tke a bam and return a file text with HLA type
+## Usage : qsub -v BAM=<path to the bam>,SORTIE=<path to outdir>,GeneList=<path to gene list>,[LOGFILE=/path/to/the/log/file] haplo_scan.sh
+## Output : file text
+## Requirements : HLAscan
+## Author : Simon.Verdez@etu.u-bourgogne.fr
+## Creation Date : 20180425
+## last revision date : 20180425
+## Known bugs : None
+#$ -N hlascan
+#$ -q batch
+#$ -pe smp 1
+#$ -V
+# Log file path option
+if [ -z ${LOGFILE} ]
+then
+    LOGFILE=haplo_scan.$(date +"%F_%H-%M-%S").log
+fi
+# Logging
+exec 1>> $LOGFILE 2>&1
+echo "$(date +"%F_%H-%M-%S"): START"
+# Check if input file exist
+if [ ! -f $BAM ]
+then
+    echo "Bam file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch haplo_scan.failed
+    exit 1
+fi
+# Check if gene file exist
+if [ ! -f $GeneList ]
+then
+    echo "Gene file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch haplo_scan.failed
+    exit 1
+fi
+# Check if config file exist
+if [ ! -f $CONFIGFILE ]
+then
+    echo "Config file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch annotate_variants_snpeff.failed
+    exit 1
+fi
+# Check if output file is specified
+if [ -z $SORTIE ]
+then
+    echo "SORTIE file is not specified"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch haplo_scan.failed
+    exit 1
+fi
+SAMPLE=`basename ${BAM} | cut -d . -f1`
+RES=${SORTIE}
+GL=/work/gad/shared/analyse/test_HLA/HLAscan_v1.0/hla-ref-5gene/gene_list
+#Launch haplo_scan
+/work/gad/shared/bin/miniconda2/bin/python2.7 /work/gad/shared/bin/HLAscan_v1.0/hla-paper/haplo_scan_v4.0-sv.py $BAM $GL $RES
+hla_exitcode=$?
+echo "hla_scan exit code : $hla_exitcode"
+if [ $hla_exitcode != 0 ]
+then
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch hla_scan.failed
+    exit 1
+fi
+echo "$(date +"%F_%H-%M-%S"): END"
--- a/merge_GATK.sh
+++ b/merge_GATK.sh
+#!/bin/ksh
+### PharmAnnot PIPELINE ###
+## merge_GATK.sh
+## Version : 1.0.0
+## Description : This script merge two vcf file
+## Usage : qsub -pe smp <nb thread> -v INPUTFILE1=</path/to/the/input/vcf/file>,INPUTFILE2=</path/to/the/input/vcf/file>,REF=</path/to/the/ref/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] merge_GATK.sh
+## Output : .vcf file containing annotations
+## Requirements : jdk 1.8.0
+## Author : simon.verdez@chu-dijon.fr
+## Creation Date : 20180702
+## last revision date : 20180702
+## Known bugs : None
+#$ -q batch
+#$ -V
+# Log file path option
+if [ -z ${LOGFILE} ]
+then
+    LOGFILE=merge_GATK.$(date +"%F_%H-%M-%S").log
+fi
+# Config file path option
+if [ -z ${CONFIGFILE} ]
+then
+    CONFIGFILE=analysis_config.tsv
+fi
+# Logging
+exec 1>> $LOGFILE 2>&1
+echo "$(date +"%F_%H-%M-%S"): START"
+# Check if config file exist
+if [ ! -f $CONFIGFILE ]
+then
+    echo "Config file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge_GATK.failed
+    exit 1
+fi
+# Check if input1 file exist
+if [ ! -f $INPUTFILE1 ]
+then
+    echo "Input1 file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge_GATK.failed
+    exit 1
+fi
+# Check if input2 file exist
+if [ ! -f $INPUTFILE2 ]
+then
+    echo "Input2 file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge_GATK.failed
+    exit 1
+fi
+# Check if ref file exist
+if [ ! -f $REF ]
+then
+    echo "ref file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge_GATK.failed
+    exit 1
+fi
+# Check if output file is specified
+if [ -z $OUTPUTFILE ]
+then
+    echo "Output file is not specified"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge_GATK.failed
+    exit 1
+fi
+JAVA=$(grep "javacmd" $CONFIGFILE | awk '{print $2}')
+PATHSCRIPT=$(grep "pipelinebase" $CONFIGFILE | awk '{print $2}')
+GATK=$(grep "GATKbase" $CONFIGFILE | awk '{print $2}')
+echo "command : $JAVA -jar $GATK -T CombineVariants -R $REF --variant $INPUTFILE1 --variant $INPUTFILE2 -o merge.vcf -genotypeMergeOptions UNIQUIFY"
+$JAVA -jar $GATK -T CombineVariants -R $REF --variant $INPUTFILE2 --variant $INPUTFILE2 -o merge.vcf -genotypeMergeOptions UNIQUIFY > $OUTPUTFILE
+genotypeMerge_exitcode=$?
+echo "GenotypeMerge exit code : $genotypeMerge_exitcode"
+if [ $genotypeMerge_exitcode != 0 ]
+then
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch merge.failed
+    exit 1
+fi
+echo "$(date +"%F_%H-%M-%S"): END"
--- a/pharmconfig.tsv
+++ b/pharmconfig.tsv
+HLAlist	/work/gad/shared/analyse/test_HLA/HLAscan_v1.0/hla-ref-5gene/gene_list
+pharmagkbase	/work/gad/sv347413/epilepsie/Pipeline/pharmgkb_v2.vcf
+pharmagktab	/work/gad/sv347413/epilepsie/Pipeline/tableau_pharmgkb.tsv
+omim	/work/gad/sv347413/epilepsie/Pipeline/tableau_OMIM.tsv
+header	/work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf
--- a/tsv2vcf.py
+++ b/tsv2vcf.py
+#!/usr/bin/python
+### PharmAnnot PIPELINE ###
+## tsv2vcf.py
+## Version : 1.1.0
+## Description : Conversion a tsv file in vcf file.
+## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -o <output/vcf/file> -e <log/file>
+## Output : vcf file
+## Requirements : python 2.7+
+## Author : Simon.Verdez@chu-dijon.fr
+## Creation Date : 20180709
+## last revision date : 20180731
+## Known bugs : None
 import os
 import sys
 import vcf
 import csv
-from pyexcel_ods import *
 from VCF_tools import *
 import subprocess
 from operator import *
+import getopt
+import logging
+#Options
+try:
+    opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:o:e:')
+    for opt, arg in opts:
+        if opt in ("-i"):
+            tsvfile = arg
+        if opt in ("-p"):
+            pharmgkb = arg
+        if opt in ("-m"):
+            OMIM = arg
+        elif opt in ("-o"):
+            out_vcf = arg
+        elif opt in ("-e"):
+            logfile = arg
+except getopt.GetoptError:
+    print 'usage : '
+    sys.exit(1)
+# Logging
+logging.basicConfig(filename =  '%s' % (logfile), filemode = 'a', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
+logging.info('start')
+# Manage options
+if not vars().has_key('tsvfile'):
+    logging.exception('No input file given')
+    sys.exit(1)
+if not vars().has_key('pharmgkb'):
+    logging.exception('No pharmgkb database given')
+    sys.exit(1)
+if not vars().has_key('OMIM'):
+    logging.exception('No OMIM database given')
+    sys.exit(1)
+# Check files
+try:
+	f = open(tsvfile, 'r')
+	f.close()
+except IOError:
+    logging.exception('Input file does not exist')
+    sys.exit(1)
+try:
+	f = open(pharmgkb, 'r')
+	f.close()
+except IOError:
+    logging.exception('Clinvar file does not exist')
+    sys.exit(1)
+try:
+	f = open(OMIM, 'r')
+	f.close()
+except IOError:
+    logging.exception('Hgmd file does not exist')
+    sys.exit(1)
-"""extraction du tableau """
-data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods")
-data2 = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau OMIM.ods")
 """Liste contenant les genes d'interets"""
 ListeG = []
 """recuperation des genes dans le fichier"""
-for i in range(len(data["hg19"])-1):
+with open(pharmgkb) as pharmfile:
-    if not data["hg19"][i]:
+    reader = csv.DictReader(pharmfile, dialect='excel-tab')
-        pass
+    for row in reader:
-    else:
+        if row['gene'] != "":
-        if data["hg19"][i][1] != "gene":
+            if row['gene'] not in ListeG:
-            """remplissage de la liste"""
+                ListeG.append(row['gene'])
-            if data["hg19"][i][1] not in ListeG:
+with open(OMIM) as OMIMfile:
-                ListeG.append((data["hg19"][i][1]))
+    reader = csv.DictReader(OMIMfile, dialect='excel-tab')
-for i in range(len(data2["hg19"])-1):
+    for row in reader:
-    if not data2["hg19"][i]:
+        if row['GENE'] != "":
-        pass
+            if row['GENE'] not in ListeG:
-    else:
+                ListeG.append(row['GENE'])
-        if data2["hg19"][i][1] != "GENE":
+#print ListeG
-            if data2["hg19"][i][1] not in ListeG:
-                ListeG.append((data2["hg19"][i][1]))
-#print(ListeG)
 """ecriture des lignes CNV dans le vcf"""
 ListeVCF=[]
 for gene in ListeG:
-    with open(sys.argv[1]) as tsvfile:
+    with open(tsvfile) as file:
-        reader = csv.DictReader(tsvfile, dialect='excel-tab')
+        reader = csv.DictReader(file, dialect='excel-tab')
        for row in reader:
            if gene in row['GeneList'] and gene != "":
                line_vcf = VCF_CNV()
                line_vcf.CHROM = row["#CHR"]
                line_vcf.POS = row["START"]
-                p = subprocess.Popen(["samtools", 'faidx',"index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
+                p = subprocess.Popen(["samtools", 'faidx',"/work/gad/sv347413/epilepsie/Pipeline/index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
                line_vcf.REF = p.stdout.readlines()[1].strip("\n")
                line_vcf.ALT = "<"+row["CNV"]+">"
                line_vcf.QUAL = "6"
@@ -61,12 +123,6 @@ for gene in ListeG:
 ListeVCF = sorted(ListeVCF,key=attrgetter('POS'))
-#print ListeVCF
 for line in ListeVCF:
-    with open (sys.argv[2],'a') as vcf_out:
+    with open (out_vcf,'a') as vcf_out:
        vcf_out.write(repr(line)+"\n")
-#test = vcf.Reader(filename = "header_CNV.vcf")
-#for record in test:
-#    print record
--- a/wrapper_create_html.sh
+++ b/wrapper_create_html.sh
+#!/bin/ksh
+### GAD PIPELINE ###
+## wrapper_create_html.sh
+## Version : 1.0.0
+## Description : a wrapper for qsubing create_html.py script
+## Usage : qsub -pe smp <nb thread> -v VCF_SNP=</path/to/the/input/vcf/file>,VCF_CNV=</path/to/the/input/vcf/file>,REPORTFILE=<path/to/the/HLAscan/report>,BAMFILE=</path/to/the/bam>,TSVFILE=</path/to/the/tsv>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_create_html.sh
+## Output : create_html.py output
+## Requirements : Recquire script create_html.py and VcfTools
+## Author : Simon.Verdez@u-bourgogne.fr
+## Creation Date : 20180802
+## last revision date : 20180802
+## Known bugs : None
+#$ -q batch
+#$ -V
+# Log file path option
+if [ -z ${LOGFILE} ]
+then
+    LOGFILE=annotate_variants.$(date +"%F_%H-%M-%S").log
+fi
+# Config file path option
+if [ -z ${CONFIGFILE} ]
+then
+    CONFIGFILE=analysis_config.tsv
+fi
+# Logging
+exec 1>> $LOGFILE 2>&1
+echo "$(date +"%F_%H-%M-%S"): START"
+# Check if config file exist
+if [ ! -f $VCF_SNP ]
+then
+    echo "Vcf SNP file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+if [ ! -f $VCF_CNV ]
+then
+    echo "Vcf CNV file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+if [ ! -f $REPORTFILE ]
+then
+    echo "Report HLAscan file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+if [ ! -f $BAMFILE ]
+then
+    echo "BAM file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+if [ ! -f $TSVFILE ]
+then
+    echo "TSV file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+# Check if output file is specified
+if [ -z $OUTPUTFILE ]
+then
+    echo "Output file is not specified"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
+PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
+# Launch python script command and check exit code
+echo "command : $PIPELINEBASE/create_html.py -v $VCF_SNP -c $VCF_CNV -h $REPORTFILE -b $BAMFILE -t $TSVFILE -o $OUTPUTFILE -e $LOGFILE"
+$PYTHONBIN $PIPELINEBASE/create_html.py -v $VCF_SNP -c $VCF_CNV -h $REPORTFILE -b $BAMFILE -t $TSVFILE -o $OUTPUTFILE -e $LOGFILE
+create_html_exitcode=$?
+echo "create_html exit code : $annotate_variants_exitcode"
+if [ $creaete_html_exitcode != 0 ]
+then
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch create_html.failed
+    exit 1
+fi
+echo "$(date +"%F_%H-%M-%S"): END"
--- a/wrapper_tsv2vcf.sh
+++ b/wrapper_tsv2vcf.sh
+#!/bin/ksh
+### GAD PIPELINE ###
+## wrapper_tsv2vcf.sh
+## Version : 1.0.0
+## Description : a wrapper for qsubing tsv2vcf.py script
+## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh
+## Output : annotate_variants.py output
+## Requirements : Recquire script tsv2vcf.py and VcfTools
+## Author : Simon.Verdez@u-bourgogne.fr
+## Creation Date : 20180702
+## last revision date : 20180702
+## Known bugs : None
+#$ -q batch
+#$ -V
+ Log file path option
+if [ -z ${LOGFILE} ]
+then
+    LOGFILE=annotate_variants.$(date +"%F_%H-%M-%S").log
+fi
+# Config file path option
+if [ -z ${CONFIGFILE} ]
+then
+    CONFIGFILE=analysis_config.tsv
+fi
+# Logging
+exec 1>> $LOGFILE 2>&1
+echo "$(date +"%F_%H-%M-%S"): START"
+# Check if config file exist
+if [ ! -f $CONFIGFILE ]
+then
+    echo "Config file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+# Check if input file exist
+if [ ! -f $INPUTFILE ]
+then
+    echo "Input file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+if [ ! -f $PHARMGKB ]
+then
+    echo "PHARMGKB reference file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+if [ ! -f $OMIM ]
+then
+    echo "OMIM reference file does not exist"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+# Check if output file is specified
+if [ -z $OUTPUTFILE ]
+then
+    echo "Output file is not specified"
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
+PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
+# Launch python script command and check exit code
+echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE"
+$PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE
+tsv2vcf_exitcode=$?
+echo "tsv2vcf exit code : $tsv2vcf_exitcode"
+if [ $tsv2vcf_exitcode != 0 ]
+then
+    echo "$(date +"%F_%H-%M-%S"): END"
+    touch tsv2vcf.failed
+    exit 1
+fi
+echo "$(date +"%F_%H-%M-%S"): END"