Commit 8957573e authored by Simon Verdez's avatar Simon Verdez

integration du header

parent 3c5a5a82
...@@ -138,7 +138,6 @@ else ...@@ -138,7 +138,6 @@ else
else else
echo "CreateHTML step was OK" echo "CreateHTML step was OK"
fi fi
if [ "$mergeVCF" == "FAIL" ] if [ "$mergeVCF" == "FAIL" ]
then then
echo "MergeVCF step failed : jumping to merge step" echo "MergeVCF step failed : jumping to merge step"
...@@ -147,8 +146,8 @@ else ...@@ -147,8 +146,8 @@ else
echo "MerfeVCF step was OK" echo "MerfeVCF step was OK"
fi fi
echo "pipeline was correctly executed, no need to relaunch a step" echo "pipeline was correctly executed, no need to relaunch a step"
exit 0 exit 0
fi fi
configure: configure:
...@@ -202,11 +201,9 @@ echo "### Variant calling CNV second step ###" ...@@ -202,11 +201,9 @@ echo "### Variant calling CNV second step ###"
echo "Start : $(date +"%F_%H-%M-%S")" echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples for currentSample in $samples
do do
echo "Command : cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf"
cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf
cnvfile=`ls -lht $ANALYSISDIR/$currentSample/ | grep "\.annot.tsv" | head -1 | awk '{print $9}'` cnvfile=`ls -lht $ANALYSISDIR/$currentSample/ | grep "\.annot.tsv" | head -1 | awk '{print $9}'`
echo "Command : qsub -pe smp 1 -N varcallCNV_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh" echo "Command : qsub -pe smp 1 -N varcallCNV_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh"
qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh
done done
echo "End : $(date +"%F_%H-%M-%S")" echo "End : $(date +"%F_%H-%M-%S")"
......
...@@ -20,3 +20,59 @@ class VCF_CNV: ...@@ -20,3 +20,59 @@ class VCF_CNV:
def __repr__(self): def __repr__(self):
vcf = "{}\t{}\t{}\t{}\t{}\t{}\t{}\tSVTYPE={};END={};SVLEN={};CIPOS={};CIEND={};GENE={}\t{}\t{}".format(self.CHROM,self.POS,self.ID,self.REF,self.ALT,self.QUAL,self.FILTER,self.SVTYPE,self.END,self.SVLEN,self.CIPOS,self.CIEND,self.GENE,self.FORMAT,self.SAMPLE) vcf = "{}\t{}\t{}\t{}\t{}\t{}\t{}\tSVTYPE={};END={};SVLEN={};CIPOS={};CIEND={};GENE={}\t{}\t{}".format(self.CHROM,self.POS,self.ID,self.REF,self.ALT,self.QUAL,self.FILTER,self.SVTYPE,self.END,self.SVLEN,self.CIPOS,self.CIEND,self.GENE,self.FORMAT,self.SAMPLE)
return vcf return vcf
def create_header():
header="""##fileformat=VCFv4.2
##reference=hg19
##INFO=<ID=BKPTID,Number=1,Type=String,Description="ID of the assembled alternate allele in the assembly file">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=1,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=MEINFO,Number=4,Type=String,Description="Mobile element info of the form NAME,START,END,POLARITY">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=GENE,Number=1,Type=String,Description="Involved genes">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
##ALT=<ID=DEL:ME:L1,Description="Deletion of L1 element">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
##ALT=<ID=INS,Description="Insertion of novel sequence">
##ALT=<ID=INS:ME:ALU,Description="Insertion of ALU element">
##ALT=<ID=INS:ME:L1,Description="Insertion of L1 element">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=CNV,Description="Copy number variable region">
##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
##contig=<ID=chrM,length=16571>
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chrY,length=59373566>
"""
return header
No preview for this file type
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
## tsv2vcf.py ## tsv2vcf.py
## Version : 1.1.0 ## Version : 1.1.0
## Description : Conversion a tsv file in vcf file. ## Description : Conversion a tsv file in vcf file.
## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -o <output/vcf/file> -e <log/file> ## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -r <reference/bam/fie> -o <output/vcf/file> -e <log/file>
## Output : vcf file ## Output : vcf file
## Requirements : python 2.7+ ## Requirements : python 2.7+
...@@ -25,7 +25,7 @@ import logging ...@@ -25,7 +25,7 @@ import logging
#Options #Options
try: try:
opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:o:e:') opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:r:o:e:')
for opt, arg in opts: for opt, arg in opts:
if opt in ("-i"): if opt in ("-i"):
tsvfile = arg tsvfile = arg
...@@ -33,6 +33,8 @@ try: ...@@ -33,6 +33,8 @@ try:
pharmgkb = arg pharmgkb = arg
if opt in ("-m"): if opt in ("-m"):
OMIM = arg OMIM = arg
if opt in ("-r"):
REFERENCE = arg
elif opt in ("-o"): elif opt in ("-o"):
out_vcf = arg out_vcf = arg
elif opt in ("-e"): elif opt in ("-e"):
...@@ -55,6 +57,9 @@ if not vars().has_key('pharmgkb'): ...@@ -55,6 +57,9 @@ if not vars().has_key('pharmgkb'):
if not vars().has_key('OMIM'): if not vars().has_key('OMIM'):
logging.exception('No OMIM database given') logging.exception('No OMIM database given')
sys.exit(1) sys.exit(1)
if not vars().has_key('REFERENCE'):
logging.exception('No REFERENCE bam given')
sys.exit(1)
# Check files # Check files
try: try:
...@@ -75,7 +80,12 @@ try: ...@@ -75,7 +80,12 @@ try:
except IOError: except IOError:
logging.exception('Hgmd file does not exist') logging.exception('Hgmd file does not exist')
sys.exit(1) sys.exit(1)
try:
f = open(REFERENCE, 'r')
f.close()
except IOError:
logging.exception('reference bam file does not exist')
sys.exit(1)
"""Liste contenant les genes d'interets""" """Liste contenant les genes d'interets"""
...@@ -108,7 +118,7 @@ for gene in ListeG: ...@@ -108,7 +118,7 @@ for gene in ListeG:
line_vcf = VCF_CNV() line_vcf = VCF_CNV()
line_vcf.CHROM = row["#CHR"] line_vcf.CHROM = row["#CHR"]
line_vcf.POS = row["START"] line_vcf.POS = row["START"]
p = subprocess.Popen(["samtools", 'faidx',"/work/gad/sv347413/epilepsie/Pipeline/index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE) p = subprocess.Popen(["samtools", 'faidx',REFERENCE, row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
line_vcf.REF = p.stdout.readlines()[1].strip("\n") line_vcf.REF = p.stdout.readlines()[1].strip("\n")
line_vcf.ALT = "<"+row["CNV"]+">" line_vcf.ALT = "<"+row["CNV"]+">"
line_vcf.QUAL = "6" line_vcf.QUAL = "6"
...@@ -123,6 +133,11 @@ for gene in ListeG: ...@@ -123,6 +133,11 @@ for gene in ListeG:
ListeVCF = sorted(ListeVCF,key=attrgetter('POS')) ListeVCF = sorted(ListeVCF,key=attrgetter('POS'))
header = create_header()
with open (out_vcf,'w') as vcf_out:
vcf_out.write(header)
for line in ListeVCF: for line in ListeVCF:
with open (out_vcf,'a') as vcf_out: with open (out_vcf,'a') as vcf_out:
vcf_out.write(repr(line)+"\n") vcf_out.write(repr(line)+"\n")
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
## wrapper_tsv2vcf.sh ## wrapper_tsv2vcf.sh
## Version : 1.0.0 ## Version : 1.0.0
## Description : a wrapper for qsubing tsv2vcf.py script ## Description : a wrapper for qsubing tsv2vcf.py script
## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh ## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,REF=<ref/bam/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh
## Output : annotate_variants.py output ## Output : annotate_variants.py output
## Requirements : Recquire script tsv2vcf.py and VcfTools ## Requirements : Recquire script tsv2vcf.py and VcfTools
...@@ -66,6 +66,14 @@ then ...@@ -66,6 +66,14 @@ then
exit 1 exit 1
fi fi
if [ ! -f $REF ]
then
echo "reference bam file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
# Check if output file is specified # Check if output file is specified
if [ -z $OUTPUTFILE ] if [ -z $OUTPUTFILE ]
then then
...@@ -79,8 +87,8 @@ PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2` ...@@ -79,8 +87,8 @@ PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2` PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
# Launch python script command and check exit code # Launch python script command and check exit code
echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE" echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -r $REF -o $OUTPUTFILE -e $LOGFILE"
$PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE $PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -r $REF -o $OUTPUTFILE -e $LOGFILE
tsv2vcf_exitcode=$? tsv2vcf_exitcode=$?
echo "tsv2vcf exit code : $tsv2vcf_exitcode" echo "tsv2vcf exit code : $tsv2vcf_exitcode"
if [ $tsv2vcf_exitcode != 0 ] if [ $tsv2vcf_exitcode != 0 ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment