Commit 5f8e477e authored by Simon Verdez's avatar Simon Verdez

ccub_test

parent 5e0a7672
#!/bin/bash #!/bin/bash
set -e; ### GAD PIPELINE ###
true; ## Master.sh
## Version : 1.0
## Description : process pharmacoAnnot for exome data
## Usage : qsub -v ANALYSISDIR=<path to the analysis dir>,CONFIGFILE=<path to the config file>,PHARMCONFIG=<path to the config_pharm file> Master.sh
## Output : no standard output
## Requirements : Require pipeline scripts
i=$1 ## Author : Simon.verdez@u-bourgogne.fr
## Creation Date : 20180725
## Last revision date : 20180725
## Known bugs : None.
if [ ! -d /work/gad/sv347413/epilepsie/tmp_res/$i ] #$ -N Master
#$ -q batchbm
#$ -pe smp 1
#$ -V
function jumpto
{
label=$1
cmd=$(sed -n "/$label:/{:a;n;p;ba};" $0 | grep -v ':$')
eval "$cmd"
exit
}
#Logging
logbasename=$(date +"%F_%H-%M-%S")
vcfbasename=$(date +"%F")
reportbasename=$(date +"%F")
if [ -z ${LOGFILE} ]
then then
mkdir /work/gad/sv347413/epilepsie/tmp_res/$i LOGFILE=PharmAnnot.$logbasename.log
fi fi
exec 1>> $LOGFILE 2>&1
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf ] # Mandatory arguments
if [ -z ${ANALYSISDIR} ]
then then
echo "SNP" echo "ANALYSISDIR was not defined : execution stopped"
/work/gad/sv347413/jdk1.8.0_111/bin/java -jar /work/gad/shared/bin/snpEff/SnpSift.jar annotate pharmgkb_v2.vcf /work/gad/sv347413/epilepsie/vcf_tmp/$i.* > /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf exit 1
fi fi
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf ] if [ -z ${CONFIGFILE+x} ]
then then
echo "CNV" echo "Config file not provided by the user. You need it to run this script. Stopping execution."
cp /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf exit
fi fi
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf ] if [ -z ${PHARMCONFIG+x} ]
then then
echo "CNV2" echo "Config file not provided by the user. You need it to run this script. Stopping execution."
python2.7 tsv2vcf.py /work/gad/sv347413/epilepsie/tsv_tmp/$i* /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf exit
mv /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf
fi fi
if [ ! -e /work/gad/sv347413/epilepsie/results_html/$i.html ]
#Usefull variables
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
TSVFILE=`grep "pharmagktab" $PHARMCONFIG | cut -f2`
PHARMGKB=`grep "pharmagkbase" $PHARMCONFIG | cut -f2`
OMIM=`grep "omim" $PHARMCONFIG | cut -f2`
HLALIST=`grep "HLAlist" $PHARMCONFIG | cut -f2`
HEADER=`grep "header" $PHARMCONFIG | cut -f2`
# python path for proper execution of python
PYTHONPATH=$PIPELINEBASE/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/
export PYTHONPATH
# Varcall status file
if [ -z ${STATUSFILE} ]
then then
echo "html" STATUSFILE=$ANALYSISDIR/status.tsv
python2.7 create_html_v2.py /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf /work/gad/sv347413/HLAscan/results/$i/Report /work/gad/sv377413/HLAscan/bam_tmp/$i.bam > /work/gad/sv347413/epilepsie/results_html/$i.html
fi fi
# Sample list
samples=`find -L $ANALYSISDIR/ -maxdepth 1 -mindepth 1 -type d | xargs -l basename`
# Batch sample list
if [ -e $ANALYSISDIR/batch.list ]
then
rm $ANALYSISDIR/batch.list
fi
find -L $ANALYSISDIR/ -maxdepth 1 -mindepth 1 -type d | xargs -l basename >> $ANALYSISDIR/batch.list
# 0 : Check status file
if [ ! -e $STATUSFILE ]
then
echo "No status file found : Jumping to configuration step"
jumpto configure
else
echo "### Getting last analysis status ###"
# delete .failed files
failedfile=($ANALYSISDIR/*.failed)
if [ -e ${failedfile[0]} ]
then
rm $ANALYSISDIR/*.failed
fi
# get the status
REF=`grep "reference" $STATUSFILE | cut -f2`
HLAscan=`grep "process_HLAscan" $STATUSFILE | cut -f2`
varcallSNP=`grep "process_varcallSNP" $STATUSFILE | cut -f2`
varcallCNV=`grep "process_varcallCNV" $STATUSFILE | cut -f2`
createHTML=`grep "process_createHTML" $STATUSFILE | cut -f2`
if [ "$HLAscan" == "FAIL" ]
then
echo "HLAscan step failed : jumping to HLAscan step"
jumpto HLAscan
else
echo "HLAscan step was OK"
fi
if [ "$varcallSNP" == "FAIL" ]
then
echo "Variant calling SNP first step failed : jumping to variant calling SNP first step"
jumpto varcallSNP
else
echo "Variant calling SNP first step was OK"
fi
if [ "$varcallCNV" == "FAIL" ]
then
echo "Variant calling CNV second step failed : jumping to variant calling CNV second step"
jumpto varcallCNV
else
echo "Variant calling CNV second step was OK"
fi
if [ "$createHTML" == "FAIL" ]
then
echo "CreateHTML step failed : jumping to variant create HTML step"
jumpto createHTML
else
echo "CreateHTML step was OK"
fi
echo "pipeline was correctly executed, no need to relaunch a step"
exit 0
fi
configure:
# Configure variant calling analysis
echo "### Configuration step ###"
echo "Start : $(date +"%F_%H-%M-%S")"
varcallfiles=($ANALYSISDIR/*/*.Master.list)
if [ -e ${varcallfiles[0]} ]
then
rm $ANALYSISDIR/*/*.Master.list
fi
situation=0
# case no family file
if [ "$situation" -eq "0" ]
then
echo "No family file"
for currentSample in $samples
do
echo $currentSample >> $ANALYSISDIR/$currentSample/$currentSample.Master.list
done
fi
HLAscan:
# HLA genotyping
echo "### Execute HLAscan ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : qsub -pe smp 1 -N HLAscan_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,OUTPUTFILE=$ANALYSISDIR/$currentSample/HLAscan,GeneList=$GL,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_HLAscan.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/haplo_scan.sh"
qsub -pe smp 1 -N HLAscan_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v BAM=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,SORTIE=$ANALYSISDIR/$currentSample/HLAscan,GeneList=$GL,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_HLAscan.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/haplo_scan.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
varcallSNP:
# Variant calling first step
echo "### Variant calling SNP first step ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : qsub -pe smp 1 -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -N varcallSNP_${currentSample} -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.raw.vcf,PHARMGKB=$PHARMGKB,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallSNP.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/annotate_variants_pharmAnnot.sh"
qsub -pe smp 1 -N varcallSNP_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.raw.vcf,PHARMGKB=$PHARMGKB,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallSNP.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/annotate_variants_pharmAnnot.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
varcallCNV:
# Variant calling second step
echo "### Variant calling CNV second step ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf"
cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf
cnvfile=`ls -lht $ANALYSISDIR/$currentSample/ | grep "\.annot.tsv" | head -1 | awk '{print $9}'`
echo "Command : qsub -pe smp 1 -N varcallCNV_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh"
qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
createHTML:
# Generate a HTML report_vcf
echo "### Generate HTML report ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : qsub -pe smp 1 -N createHTML_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v VCF_SNP=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,VCF_CNV=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,REPORTFILE=$ANALYSISDIR/$currentSample/HLAscan/Report,BAMFILE=$ANALYSISDIR/$currentSample/$currentSample.dedup.bam,TSVFILE=$TSVFILE,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.html,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_create_html.sh"
qsub -pe smp 1 -N createHTML_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v VCFFILE_SNP=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,VCFFILE_CNV=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,REPORTFILE=$ANALYSISDIR/$currentSample/$currentSample.report,BAMFILE=$ANALYSISDIR/$currentSample/$currentSample.bam,TSVFILE=$TSVFILE,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.html,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_create_html.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
MergeVCF:
#merge vcf fies
echo "### Merge VCF files"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : qsub -pe smp 1 -N mergeVCF_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE1=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,INPUTFILE2=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.merge.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/merge_GATK.sh"
qsub -pe smp 1 -N mergeVCF_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE1=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.CNV.vcf,INPUTFILE2=$ANALYSISDIR/$currentSample/$currentSample.pharmacoAnnot.SNP.vcf,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcall.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/merge_GATK.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
File added
pipelinebase /work/gad/shared/pipeline/pharmacogen_test/pharmAnnot
pythonbin /work/gad/shared/bin/miniconda2/bin/python
GATKbase /work/gad/shared/bin/GATK_3.7
javacmd /soft/java/jdk_x64/bin/java
picardbase /work/gad/shared/bin/PICARD_2.4.1
temporary_dir /work/gad/shared/tmp/
reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa
refdict /work/gad/shared/pipeline/hg19/index/hg19_essential.dict
refdir /work/gad/shared/pipeline/hg19/index
dbsnp /work/gad/shared/pipeline/hg19/clinvar/dbsnp.vcf
mainqueue batch
autosubqueue batchbm
bwacmd /work/gad/shared/bin/bwa-0.7.15
fastqccmd /work/gad/shared/bin/FastQC_0.11.4/fastqc
fastqc_config /work/gad/shared/bin/FastQC_0.11.4/Configuration/limits.txt
trimmomatic /work/gad/shared/bin/Trimmomatic-0.35/trimmomatic-0.35.jar
adapters_file /work/gad/shared/bin/Trimmomatic-0.35/adapters/TruSeq3-PE-All.fa
samtools /work/gad/shared/bin/samtools-1.5/
igvtools /work/gad/shared/bin/IGVTools_2.3.67/igvtools.jar
targetlist /work/gad/shared/pipeline/hg19/targets/
refseqlist /work/gad/shared/pipeline/hg19/refseq/refseq.cds.2bp.hg19.with_genes.interval_list
refseqgenelist /work/gad/shared/pipeline/hg19/refseq/genetrack.hg19.sorted.refseq
controlfile /work/gad/shared/pipeline/samples/control_100_samples.g.vcf
email emilie.tisserant@u-bourgogne.fr
clinvar_vcf /work/gad/shared/pipeline/hg19/clinvar/clinvar.vcf
cosmic_vcf /work/gad/shared/pipeline/hg19/cosmic/CosmicCodingMuts.vcf
omim_list /work/gad/shared/pipeline/hg19/omim/OMIM2.list
acmg_list /work/gad/shared/pipeline/hg19/omim/ACMG.list
common_list /work/gad/shared/pipeline/hg19/clinvar/common.list
ssa_nb_files 10
tabixpath /work/gad/shared/bin/htslib-1.3/bin/tabix
kaviar /work/gad/shared/pipeline/hg19/kaviar/Kaviar-160204-Public-hg19.vcf.gz
regulomedb /work/gad/shared/pipeline/hg19/regulomeDB/RegulomeDB.dbSNP141.txt.gz
targetscan /work/gad/shared/pipeline/hg19/targetscan/Predicted_Targets.hg19.bed
cytobands /work/gad/shared/pipeline/hg19/cnv/cytobands.tsv
dgv_variants /work/gad/shared/pipeline/hg19/cnv/GRCh37_hg19_variants_2015-07-23.txt.gz
cnv_map /work/gad/shared/pipeline/hg19/cnv/Stringent.hg19.2015-02-03.txt.gz
isca_benign_cnv /work/gad/shared/pipeline/hg19/cnv/iscaBenign.txt.gz
isca_pathogenic_cnv /work/gad/shared/pipeline/hg19/cnv/iscaPathogenic.txt.gz
devdelay_control_cnv /work/gad/shared/pipeline/hg19/cnv/cnvDevDelayControl.txt.gz
devdelay_case_cnv /work/gad/shared/pipeline/hg19/cnv/cnvDevDelayCase.txt.gz
clingen_dosage_sensitive_gene /work/gad/shared/pipeline/hg19/cnv/ClinGen_gene_curation_list.tsv
xhmmbin /work/gad/shared/bin/xhmm_1.0/xhmm
xhmmparams /work/gad/shared/pipeline/hg19/cnv/xhmm_params.txt
cnvtarget /work/gad/shared/pipeline/hg19/cnv/cnv_targets_refseq_only_coding_exon_2bp.hg19.list
targetgc /work/gad/shared/pipeline/hg19/cnv/cnv_targets_refseq_only_coding_exon_2bp.hg19_GC.txt
docmatrix /work/gad/shared/pipeline/hg19/cnv/ALL.CNV.RD.txt
extremegc /work/gad/shared/pipeline/hg19/cnv/extreme_gc_targets.txt
lowcomplexity /work/gad/shared/pipeline/hg19/cnv/low_complexity_targets.txt
controlfreec /work/gad/shared/bin/ControlFreeC
gemMappabilityFile /work/gad/shared/pipeline/hg19/cnv/out100m1_hg19.gem
chrLenFile /work/gad/shared/pipeline/hg19/index/hg19.len
chrFiles /work/gad/shared/pipeline/hg19/index/chr/
lumpybase /work/gad/shared/bin/lumpy/lumpy-sv-master/
mapabilitybedgraph /work/gad/shared/pipeline/hg19/cnv/wgEncodeCrgMapabilityAlign100mer_no_uniq.bedgraph
blacklistfile /work/gad/shared/pipeline/hg19/cnv/wgEncodeDacMapabilityConsensusExcludable.txt
repeatmasker /work/gad/shared/pipeline/hg19/cnv/UCSC_rmsk.hg19.sort.bed.gz
bedtools /work/gad/shared/bin/bedtools
nist_bed /work/gad/shared/pipeline/hg19/nist/agilent_CRE.refseq_cds.NA12878_GIAB_highconf.intersect.hg19.bed
nist_vcf /work/gad/shared/pipeline/hg19/nist/NA12878_GIAB_highconf.agilent.refseq.intersect.vcf
hg19gtf /work/gad/shared/pipeline/hg19/refseq/hg19.refseq.with_genes.gtf
star2 /work/gad/shared/bin/STAR-2.5.2b/source
circosconfdir /work/gad/shared/pipeline/hg19/circos/
circosbin /work/gad/shared/bin/circos-0.69-2/bin/circos
rscript /soft/R/3.2.3/intel/13.1.3/bin/Rscript
snpeffbase /work/gad/shared/bin/snpEff
exacvcf /work/gad/shared/pipeline/hg19/ExAC/ExAC.r0.3.1.sites.vep.vcf.gz
exacscores /work/gad/shared/pipeline/hg19/ExAC/fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt
dbnsfp /work/gad/shared/bin/snpEff/data/dbNSFP/dbNSFP2.9.txt.gz
gnomadex /work/gad/shared/pipeline/hg19/ExAC/gnomad.exomes.r2.0.2.sites.vcf.gz
gnomadge /work/gad/shared/pipeline/hg19/ExAC/gnomad.genomes.r2.0.2.sites.vcf.gz
gnomadchr /work/gad/shared/pipeline/hg19/ExAC/gnomad.genomes.r2.0.2.sites
hgmd_vcf /work/gad/shared/pipeline/hg19/HGMD/HGMD_PRO_2017.2_hg19.vcf
regulatoryfeatures /work/gad/shared/pipeline/hg19/ensembl_regulation/regulatory_features.bed
motiffeatures /work/gad/shared/pipeline/hg19/ensembl_regulation/motif_features.bed
encodednase /work/gad/shared/pipeline/hg19/Encode/wgEncodeRegDnaseClusteredV3.bed
encodetfbs /work/gad/shared/pipeline/hg19/Encode/wgEncodeRegTfbsClusteredV3.bed
linsight /work/gad/shared/pipeline/hg19/LINSIGHT
fantom /work/gad/shared/pipeline/hg19/FANTOM/fantom_cage_peak.bed
isgenic /work/gad/shared/pipeline/hg19/FANTOM/isGenic.bed
genelists /work/gad/shared/pipeline/hg19/gene_lists/
vtbase /work/gad/shared/bin/vt
#!/bin/ksh
### GAD PIPELINE ###
## annotate_variants_snpeff.sh
## Version : 1.0
## Description : This script allows user to launch SnpEff and SNpSift for basic variant annotation, frequency annotation and score annotation
## Usage : qsub -pe smp <nb thread> -v INPUTFILE=<raw/vcf/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,PHARMGKB=<ref/file/pharmgkb>,[NOCHECK=yes|no],[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] annotate_variants_pharmAnnot.sh
## Output : .vcf file containing annotations
## Requirements : jdk 1.8.0
## Author : Simon.Verdez@u-bourgogne.fr
## Creation Date : 20180727
## last revision date : 20180727
## Known bugs : None
#$ -q batch
#$ -V
# Log file path option
if [ -z ${LOGFILE} ]
then
LOGFILE=annotate_variants_snpeff.$(date +"%F_%H-%M-%S").log
fi
# Config file path option
if [ -z ${CONFIGFILE} ]
then
CONFIGFILE=analysis_config.tsv
fi
# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if config file exist
if [ ! -f $CONFIGFILE ]
then
echo "Config file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
# Check if input file exist
if [ ! -f $INPUTFILE ]
then
echo "Input file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
if [ ! -f $PHARMGKB ]
then
echo "ref pharmgkb file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
echo "Output file is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
JAVA=$(grep "javacmd" $CONFIGFILE | awk '{print $2}')
PATHSCRIPT=$(grep "pipelinebase" $CONFIGFILE | awk '{print $2}')
SNPEFF=$(grep "snpeffbase" $CONFIGFILE | awk '{print $2}')
# Launch Snpeff and SnpSift command and check exit code
echo "Command: $JAVA -Xmx4g -jar $SNPEFF/SnpSift.jar Annotate $PHARMGKB $INPUTFILE > $OUTPUTFILE"
bash -o pipefail -c "$JAVA -Xmx4g -jar $SNPEFF/SnpSift.jar Annotate $PHARMGKB $INPUTFILE > $OUTPUTFILE"
snpeff_exitcode=$?
echo "snpeff exit code : $snpeff_exitcode"
if [ $snpeff_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_Annot.failed
exit 1
fi
if [ -z ${NOCHECK} ] || [ ${NOCHECK} == "no" ]
then
chr=`tail -1 $OUTPUTFILE | cut -f1`
if [ $chr != "chrX" ] && [ $chr != "chrY" ] && [ $chr != "X" ] && [ $chr != "Y" ]
then
echo "vcf file does not seem complete"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
fi
echo "$(date +"%F_%H-%M-%S"): END"
#!/usr/bin/python
### PharmAnnot PIPELINE ###
## create_html_v2.py
## Version : 1.0.0
## Description : Create a html file with vcf file, BAM file and report HLAscan
## Usage : python create_html_v2.py -v vcf_SNP -c vcf_CNV -h report_HLA -b BAM -t tsvfile -o html -e log
## Output : Annotated vcf file
## Requirements : python 2.7+
## Author : Simon.Verdez@u-bourgogne.fr
## Creation Date : 20180731
## last revision date : 20180731
## Known bugs : None
#Example : python create_html_v2.py dijex2000.vcf report_HLA BAM
import sys import sys
import subprocess import subprocess
from pyexcel_ods import * import csv
from pysam import VariantFile from pysam import VariantFile
import htmltag import htmltag
# Options
try:
opts, args = getopt.getopt(sys.argv[1:], 'v:h:b:o:e:')
for opt, arg in opts:
if opt in ("-v"):
vcfSNP = arg
if opt in ("-c"):
vcfCNV = arg
if opt in ("-h"):
report = arg
if opt in ("-b"):
bam = arg
if opt in ("-t"):
tsvfile = arg
elif opt in ("-o"):
sys.stdout = open(arg, 'w')
elif opt in ("-e"):
logfile = arg
except getopt.GetoptError:
print 'usage : '
sys.exit(1)
# Logging
logging.basicConfig(filename = '%s' % (logfile), filemode = 'a', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
logging.info('start')
# Manage options
if not vars().has_key('vcf_SNP'):
logging.exception('No vcf SNP file given')
sys.exit(1)
if not vars().has_key('vcf_CNV'):
logging.exception('No vcf CNV file given')
sys.exit(1)
if not vars().has_key('report'):
logging.exception('No report HLAscan given')
sys.exit(1)
if not vars().has_key('bam'):
logging.exception('No bam file given')
sys.exit(1)
if not vars().has_key('tsvfile'):
logging.exception('No tsv file given')
sys.exit(1)
# Check files
try:
f = open(vcf_SNP, 'r')
f.close()
except IOError:
logging.exception('vcf_SNP file does not exist')
sys.exit(1)
try:
f = open(vcf_CNV, 'r')
f.close()
except IOError:
logging.exception('vcf_CNV file does not exist')
sys.exit(1)
try:
f = open(report, 'r')
f.close()
except IOError:
logging.exception('Report HLAscan file does not exist')
sys.exit(1)
try:
f = open(bam, 'r')
f.close()
except IOError:
logging.exception('BAM file does not exist')
sys.exit(1)
try:
f = open(tsvfile, 'r')
f.close()
except IOError:
logging.exception('Tsv file does not exist')
sys.exit(1)
" construction du formulaire en premier " " construction du formulaire en premier "
head = htmltag.HEAD() head = htmltag.HEAD()
head = htmltag.append(htmltag.TITLE('pharmacogenetic')) head = htmltag.append(htmltag.TITLE('pharmacogenetic'))
...@@ -14,17 +107,16 @@ head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar li { backgro ...@@ -14,17 +107,16 @@ head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar li { backgro
head = head.append(htmltag.STYLE(type="text/css").append("P1 {color: red }")) head = head.append(htmltag.STYLE(type="text/css").append("P1 {color: red }"))
head = head.append(htmltag.STYLE(type="text/css").append("P2 { color: orange }")) head = head.append(htmltag.STYLE(type="text/css").append("P2 { color: orange }"))
body = htmltag.BODY() body = htmltag.BODY()
body = body.append(htmltag.UL(CLASS = "navbar").append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://www.pharmgkb.org").append("pharmgkb")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://cpicpgx.org").append("cpicpgx")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "http://www.pharmacogenetics.fr/").append("pharmacogenetics.fr"))))) body = body.append(htmltag.UL(CLASS = "navbar").append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://www.pharmgkb.org").append("pharmgkb")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://cpicpgx.org").append("cpicpgx")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "http://www.pharmacogenetics.fr/").append("pharmacogenetics.fr")))))
body = body.append(htmltag.H2('Variants pharmgkb')) body = body.append(htmltag.H2('Variants pharmgkb'))
"""extraction des donnees """ """extraction des donnees """
data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods") data = get_data(tsvfile)
vcf_reader_SNP = VariantFile(sys.argv[1]) vcf_reader_SNP = VariantFile(vcfSNP)
vcf_reader_CNV = VariantFile(sys.argv[2]) vcf_reader_CNV = VariantFile(vcfCNV)
BAM_FILE = VariantFile(sys.argv[4]) BAM_FILE = VariantFile(bam)
Report_HLA = open(sys.argv[3],"r") Report_HLA = open(report,"r")
for record in vcf_reader_SNP.fetch(): for record in vcf_reader_SNP.fetch():
try: try:
...@@ -60,14 +152,13 @@ for record in vcf_reader_CNV.fetch(): ...@@ -60,14 +152,13 @@ for record in vcf_reader_CNV.fetch():
ListeM = [] ListeM = []
ListeD = [] ListeD = []
body= body.append(htmltag.P("Deletion detectee par xHMM sur le gene "+record.info["GENE"])) body= body.append(htmltag.P("Deletion detectee par xHMM sur le gene "+record.info["GENE"]))
for i in range(len(data["hg19"])-1): with open(data) as tsvfile:
if not data["hg19"][i]: reader = csv.DictReader(tsvfile, dialect='excel-tab')
pass for row in reader:
else: if row['gene'] != "":
if data["hg19"][i][1] != "gene": if row['drug'] == record.info["GENE"]:
if data["hg19"][i][1] == record.info["GENE"]: if row['drug'] not in ListeM:
if data["hg19"][i][0] not in ListeM: ListeG.append(row['drug'])
ListeM.append(data["hg19"][i][0])
for Mol in ListeM: for Mol in ListeM:
body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol)) body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
depth_ave = 0.0 depth_ave = 0.0
...@@ -93,14 +184,13 @@ for record in vcf_reader_CNV.fetch(): ...@@ -93,14 +184,13 @@ for record in vcf_reader_CNV.fetch():
if elm == "<DUP>": if elm == "<DUP>":
ListeM = [] ListeM = []
body= body.append(htmltag.P("Duplication detectee par xHMM sur le gene "+record.info["GENE"])) body= body.append(htmltag.P("Duplication detectee par xHMM sur le gene "+record.info["GENE"]))
for i in range(len(data["hg19"])-1): with open(data) as tsvfile:
if not data["hg19"][i]: reader = csv.DictReader(tsvfile, dialect='excel-tab')
pass for row in reader:
else: if row['gene'] != "":
if data["hg19"][i][1] != "gene": if row['drug'] == record.info["GENE"]:
if data["hg19"][i][1] == record.info["GENE"]: if row['drug'] not in ListeM:
if data["hg19"][i][0] not in ListeM: ListeG.append(row['drug'])
ListeM.append(data["hg19"][i][0])
for Mol in ListeM: for Mol in ListeM:
body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol)) body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
HLA = Report_HLA.readlines() HLA = Report_HLA.readlines()
...@@ -138,20 +228,19 @@ body = body.append(htmltag.P("HLA-C : "+ListeC[0]+" / "+ListeC[1])) ...@@ -138,20 +228,19 @@ body = body.append(htmltag.P("HLA-C : "+ListeC[0]+" / "+ListeC[1]))
body = body.append(htmltag.P("HLA-DQB1 : "+ListeDQB1[0]+" / "+ListeDQB1[1])) body = body.append(htmltag.P("HLA-DQB1 : "+ListeDQB1[0]+" / "+ListeDQB1[1]))
body = body.append(htmltag.P("HLA-DRB1 : "+ListeDRB1[0]+" / "+ListeDRB1[1])) body = body.append(htmltag.P("HLA-DRB1 : "+ListeDRB1[0]+" / "+ListeDRB1[1]))
for i in range(len(data["hg19"])-1): with open(data) as tsvfile:
if not data["hg19"][i]: reader = csv.DictReader(tsvfile, dialect='excel-tab')
pass for row in reader:
else: if row['gene'] != "":
if data["hg19"][i][1] != "gene": if row['variants'].startswith("HLA-A"):
if data["hg19"][i][2].startswith("HLA-A"): Adigit = row['variants'].split("*")[1]
Adigit = data["hg19"][i][2].split("*")[1]
Adigit2 = Adigit.split(":")[0]+Adigit.split(":")[1] Adigit2 = Adigit.split(":")[0]+Adigit.split(":")[1]
HL1 = ListeA[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1] HL1 = ListeA[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1]
HL2 = ListeA[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1] HL2 = ListeA[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1]
if HL1 == Adigit2: if HL1 == Adigit2:
pheno = data["hg19"][i][4] pheno = row['effect']
publi = data["hg19"][i][5] publi = row['article']
level = str(data["hg19"][i][10]) level = str(row['LEVEL'])
if level.startswith("1"): if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level))) body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
...@@ -162,9 +251,9 @@ for i in range(len(data["hg19"])-1): ...@@ -162,9 +251,9 @@ for i in range(len(data["hg19"])-1):
body = body.append(htmltag.P(pheno)+htmltag.P(level)) body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if HL2 == Adigit2: if HL2 == Adigit2:
pheno = data["hg19"][i][4] pheno = row['effect']
publi = data["hg19"][i][5] publi = row['article']
level = str(data["hg19"][i][10]) level = str(row['LEVEL'])
if level.startswith("1"): if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level))) body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
...@@ -175,15 +264,15 @@ for i in range(len(data["hg19"])-1): ...@@ -175,15 +264,15 @@ for i in range(len(data["hg19"])-1):
body = body.append(htmltag.P(pheno)+htmltag.P(level)) body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if data["hg19"][i][2].startswith("HLA-B"): if row['varaints'].startswith("HLA-B"):
Bdigit = data["hg19"][i][2].split("*")[1] Bdigit = row['variants'].split("*")[1]
Bdigit2 = Adigit.split(":")[0]+Adigit.split(":")[1] Bdigit2 = Bdigit.split(":")[0]+Bdigit.split(":")[1]
HL1 = ListeB[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1] HL1 = ListeB[0].split("*")[1].split(":")[0]+ListeB[0].split("*")[1].split(":")[1]
HL2 = ListeB[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1] HL2 = ListeB[1].split("*")[1].split(":")[0]+ListeB[1].split("*")[1].split(":")[1]
if HL1 == Bdigit2: if HL1 == Bdigit2:
pheno = data["hg19"][i][4] pheno = row['effect']
publi = data["hg19"][i][5] publi = row['article']
level = str(data["hg19"][i][10]) level = str(row['LEVEL'])
if level.startswith("1"): if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level))) body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
...@@ -194,9 +283,9 @@ for i in range(len(data["hg19"])-1): ...@@ -194,9 +283,9 @@ for i in range(len(data["hg19"])-1):
body = body.append(htmltag.P(pheno)+htmltag.P(level)) body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if HL2 == Bdigit2: if HL2 == Bdigit2:
pheno = data["hg19"][i][4] pheno = row['effect']
publi = data["hg19"][i][5] publi = row['article']
level = str(data["hg19"][i][10]) level = str(row['LEVEL'])
if level.startswith("1"): if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level))) body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))) body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
......
#!/bin/bash
###GAD PIPELINE ###
## haplo_scan.sh
## Version : 1.0.0
## Description : This scprit tke a bam and return a file text with HLA type
## Usage : qsub -v BAM=<path to the bam>,SORTIE=<path to outdir>,GeneList=<path to gene list>,[LOGFILE=/path/to/the/log/file] haplo_scan.sh
## Output : file text
## Requirements : HLAscan
## Author : Simon.Verdez@etu.u-bourgogne.fr
## Creation Date : 20180425
## last revision date : 20180425
## Known bugs : None
#$ -N hlascan
#$ -q batch
#$ -pe smp 1
#$ -V
# Log file path option
if [ -z ${LOGFILE} ]
then
LOGFILE=haplo_scan.$(date +"%F_%H-%M-%S").log
fi
# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if input file exist
if [ ! -f $BAM ]
then
echo "Bam file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch haplo_scan.failed
exit 1
fi
# Check if gene file exist
if [ ! -f $GeneList ]
then
echo "Gene file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch haplo_scan.failed
exit 1
fi
# Check if config file exist
if [ ! -f $CONFIGFILE ]
then
echo "Config file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch annotate_variants_snpeff.failed
exit 1
fi
# Check if output file is specified
if [ -z $SORTIE ]
then
echo "SORTIE file is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch haplo_scan.failed
exit 1
fi
SAMPLE=`basename ${BAM} | cut -d . -f1`
RES=${SORTIE}
GL=/work/gad/shared/analyse/test_HLA/HLAscan_v1.0/hla-ref-5gene/gene_list
#Launch haplo_scan
/work/gad/shared/bin/miniconda2/bin/python2.7 /work/gad/shared/bin/HLAscan_v1.0/hla-paper/haplo_scan_v4.0-sv.py $BAM $GL $RES
hla_exitcode=$?
echo "hla_scan exit code : $hla_exitcode"
if [ $hla_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch hla_scan.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
#!/bin/ksh
### PharmAnnot PIPELINE ###
## merge_GATK.sh
## Version : 1.0.0
## Description : This script merge two vcf file
## Usage : qsub -pe smp <nb thread> -v INPUTFILE1=</path/to/the/input/vcf/file>,INPUTFILE2=</path/to/the/input/vcf/file>,REF=</path/to/the/ref/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] merge_GATK.sh
## Output : .vcf file containing annotations
## Requirements : jdk 1.8.0
## Author : simon.verdez@chu-dijon.fr
## Creation Date : 20180702
## last revision date : 20180702
## Known bugs : None
#$ -q batch
#$ -V
# Log file path option
if [ -z ${LOGFILE} ]
then
LOGFILE=merge_GATK.$(date +"%F_%H-%M-%S").log
fi
# Config file path option
if [ -z ${CONFIGFILE} ]
then
CONFIGFILE=analysis_config.tsv
fi
# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if config file exist
if [ ! -f $CONFIGFILE ]
then
echo "Config file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch merge_GATK.failed
exit 1
fi
# Check if input1 file exist
if [ ! -f $INPUTFILE1 ]
then
echo "Input1 file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch merge_GATK.failed
exit 1
fi
# Check if input2 file exist
if [ ! -f $INPUTFILE2 ]
then
echo "Input2 file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch merge_GATK.failed
exit 1
fi
# Check if ref file exist
if [ ! -f $REF ]
then
echo "ref file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch merge_GATK.failed
exit 1
fi
# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
echo "Output file is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch merge_GATK.failed
exit 1
fi
JAVA=$(grep "javacmd" $CONFIGFILE | awk '{print $2}')
PATHSCRIPT=$(grep "pipelinebase" $CONFIGFILE | awk '{print $2}')
GATK=$(grep "GATKbase" $CONFIGFILE | awk '{print $2}')
echo "command : $JAVA -jar $GATK -T CombineVariants -R $REF --variant $INPUTFILE1 --variant $INPUTFILE2 -o merge.vcf -genotypeMergeOptions UNIQUIFY"
$JAVA -jar $GATK -T CombineVariants -R $REF --variant $INPUTFILE2 --variant $INPUTFILE2 -o merge.vcf -genotypeMergeOptions UNIQUIFY > $OUTPUTFILE
genotypeMerge_exitcode=$?
echo "GenotypeMerge exit code : $genotypeMerge_exitcode"
if [ $genotypeMerge_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch merge.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
HLAlist /work/gad/shared/analyse/test_HLA/HLAscan_v1.0/hla-ref-5gene/gene_list
pharmagkbase /work/gad/sv347413/epilepsie/Pipeline/pharmgkb_v2.vcf
pharmagktab /work/gad/sv347413/epilepsie/Pipeline/tableau_pharmgkb.tsv
omim /work/gad/sv347413/epilepsie/Pipeline/tableau_OMIM.tsv
header /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf
#!/usr/bin/python
### PharmAnnot PIPELINE ###
## tsv2vcf.py
## Version : 1.1.0
## Description : Conversion a tsv file in vcf file.
## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -o <output/vcf/file> -e <log/file>
## Output : vcf file
## Requirements : python 2.7+
## Author : Simon.Verdez@chu-dijon.fr
## Creation Date : 20180709
## last revision date : 20180731
## Known bugs : None
import os import os
import sys import sys
import vcf import vcf
import csv import csv
from pyexcel_ods import *
from VCF_tools import * from VCF_tools import *
import subprocess import subprocess
from operator import * from operator import *
import getopt
import logging
#Options
try:
opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:o:e:')
for opt, arg in opts:
if opt in ("-i"):
tsvfile = arg
if opt in ("-p"):
pharmgkb = arg
if opt in ("-m"):
OMIM = arg
elif opt in ("-o"):
out_vcf = arg
elif opt in ("-e"):
logfile = arg
except getopt.GetoptError:
print 'usage : '
sys.exit(1)
# Logging
logging.basicConfig(filename = '%s' % (logfile), filemode = 'a', level = logging.INFO, format = '%(asctime)s %(levelname)s - %(message)s')
logging.info('start')
# Manage options
if not vars().has_key('tsvfile'):
logging.exception('No input file given')
sys.exit(1)
if not vars().has_key('pharmgkb'):
logging.exception('No pharmgkb database given')
sys.exit(1)
if not vars().has_key('OMIM'):
logging.exception('No OMIM database given')
sys.exit(1)
# Check files
try:
f = open(tsvfile, 'r')
f.close()
except IOError:
logging.exception('Input file does not exist')
sys.exit(1)
try:
f = open(pharmgkb, 'r')
f.close()
except IOError:
logging.exception('Clinvar file does not exist')
sys.exit(1)
try:
f = open(OMIM, 'r')
f.close()
except IOError:
logging.exception('Hgmd file does not exist')
sys.exit(1)
"""extraction du tableau """
data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods")
data2 = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau OMIM.ods")
"""Liste contenant les genes d'interets""" """Liste contenant les genes d'interets"""
ListeG = [] ListeG = []
"""recuperation des genes dans le fichier""" """recuperation des genes dans le fichier"""
for i in range(len(data["hg19"])-1): with open(pharmgkb) as pharmfile:
if not data["hg19"][i]: reader = csv.DictReader(pharmfile, dialect='excel-tab')
pass for row in reader:
else: if row['gene'] != "":
if data["hg19"][i][1] != "gene": if row['gene'] not in ListeG:
"""remplissage de la liste""" ListeG.append(row['gene'])
if data["hg19"][i][1] not in ListeG: with open(OMIM) as OMIMfile:
ListeG.append((data["hg19"][i][1])) reader = csv.DictReader(OMIMfile, dialect='excel-tab')
for i in range(len(data2["hg19"])-1): for row in reader:
if not data2["hg19"][i]: if row['GENE'] != "":
pass if row['GENE'] not in ListeG:
else: ListeG.append(row['GENE'])
if data2["hg19"][i][1] != "GENE": #print ListeG
if data2["hg19"][i][1] not in ListeG:
ListeG.append((data2["hg19"][i][1]))
#print(ListeG)
"""ecriture des lignes CNV dans le vcf""" """ecriture des lignes CNV dans le vcf"""
ListeVCF=[] ListeVCF=[]
for gene in ListeG: for gene in ListeG:
with open(sys.argv[1]) as tsvfile: with open(tsvfile) as file:
reader = csv.DictReader(tsvfile, dialect='excel-tab') reader = csv.DictReader(file, dialect='excel-tab')
for row in reader: for row in reader:
if gene in row['GeneList'] and gene != "": if gene in row['GeneList'] and gene != "":
line_vcf = VCF_CNV() line_vcf = VCF_CNV()
line_vcf.CHROM = row["#CHR"] line_vcf.CHROM = row["#CHR"]
line_vcf.POS = row["START"] line_vcf.POS = row["START"]
p = subprocess.Popen(["samtools", 'faidx',"index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE) p = subprocess.Popen(["samtools", 'faidx',"/work/gad/sv347413/epilepsie/Pipeline/index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
line_vcf.REF = p.stdout.readlines()[1].strip("\n") line_vcf.REF = p.stdout.readlines()[1].strip("\n")
line_vcf.ALT = "<"+row["CNV"]+">" line_vcf.ALT = "<"+row["CNV"]+">"
line_vcf.QUAL = "6" line_vcf.QUAL = "6"
...@@ -61,12 +123,6 @@ for gene in ListeG: ...@@ -61,12 +123,6 @@ for gene in ListeG:
ListeVCF = sorted(ListeVCF,key=attrgetter('POS')) ListeVCF = sorted(ListeVCF,key=attrgetter('POS'))
#print ListeVCF
for line in ListeVCF: for line in ListeVCF:
with open (sys.argv[2],'a') as vcf_out: with open (out_vcf,'a') as vcf_out:
vcf_out.write(repr(line)+"\n") vcf_out.write(repr(line)+"\n")
#test = vcf.Reader(filename = "header_CNV.vcf")
#for record in test:
# print record
#!/bin/ksh
### GAD PIPELINE ###
## wrapper_create_html.sh
## Version : 1.0.0
## Description : a wrapper for qsubing create_html.py script
## Usage : qsub -pe smp <nb thread> -v VCF_SNP=</path/to/the/input/vcf/file>,VCF_CNV=</path/to/the/input/vcf/file>,REPORTFILE=<path/to/the/HLAscan/report>,BAMFILE=</path/to/the/bam>,TSVFILE=</path/to/the/tsv>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_create_html.sh
## Output : create_html.py output
## Requirements : Recquire script create_html.py and VcfTools
## Author : Simon.Verdez@u-bourgogne.fr
## Creation Date : 20180802
## last revision date : 20180802
## Known bugs : None
#$ -q batch
#$ -V
# Log file path option
if [ -z ${LOGFILE} ]
then
LOGFILE=annotate_variants.$(date +"%F_%H-%M-%S").log
fi
# Config file path option
if [ -z ${CONFIGFILE} ]
then
CONFIGFILE=analysis_config.tsv
fi
# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if config file exist
if [ ! -f $VCF_SNP ]
then
echo "Vcf SNP file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
if [ ! -f $VCF_CNV ]
then
echo "Vcf CNV file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
if [ ! -f $REPORTFILE ]
then
echo "Report HLAscan file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
if [ ! -f $BAMFILE ]
then
echo "BAM file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
if [ ! -f $TSVFILE ]
then
echo "TSV file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
echo "Output file is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
# Launch python script command and check exit code
echo "command : $PIPELINEBASE/create_html.py -v $VCF_SNP -c $VCF_CNV -h $REPORTFILE -b $BAMFILE -t $TSVFILE -o $OUTPUTFILE -e $LOGFILE"
$PYTHONBIN $PIPELINEBASE/create_html.py -v $VCF_SNP -c $VCF_CNV -h $REPORTFILE -b $BAMFILE -t $TSVFILE -o $OUTPUTFILE -e $LOGFILE
create_html_exitcode=$?
echo "create_html exit code : $annotate_variants_exitcode"
if [ $creaete_html_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch create_html.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
#!/bin/ksh
### GAD PIPELINE ###
## wrapper_tsv2vcf.sh
## Version : 1.0.0
## Description : a wrapper for qsubing tsv2vcf.py script
## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh
## Output : annotate_variants.py output
## Requirements : Recquire script tsv2vcf.py and VcfTools
## Author : Simon.Verdez@u-bourgogne.fr
## Creation Date : 20180702
## last revision date : 20180702
## Known bugs : None
#$ -q batch
#$ -V
Log file path option
if [ -z ${LOGFILE} ]
then
LOGFILE=annotate_variants.$(date +"%F_%H-%M-%S").log
fi
# Config file path option
if [ -z ${CONFIGFILE} ]
then
CONFIGFILE=analysis_config.tsv
fi
# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if config file exist
if [ ! -f $CONFIGFILE ]
then
echo "Config file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
# Check if input file exist
if [ ! -f $INPUTFILE ]
then
echo "Input file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
if [ ! -f $PHARMGKB ]
then
echo "PHARMGKB reference file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
if [ ! -f $OMIM ]
then
echo "OMIM reference file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
echo "Output file is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
# Launch python script command and check exit code
echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE"
$PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE
tsv2vcf_exitcode=$?
echo "tsv2vcf exit code : $tsv2vcf_exitcode"
if [ $tsv2vcf_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment