Commit 8957573e authored by Simon Verdez's avatar Simon Verdez

integration du header

parent 3c5a5a82
......@@ -138,7 +138,6 @@ else
else
echo "CreateHTML step was OK"
fi
if [ "$mergeVCF" == "FAIL" ]
then
echo "MergeVCF step failed : jumping to merge step"
......@@ -147,8 +146,8 @@ else
echo "MerfeVCF step was OK"
fi
echo "pipeline was correctly executed, no need to relaunch a step"
exit 0
echo "pipeline was correctly executed, no need to relaunch a step"
exit 0
fi
configure:
......@@ -202,11 +201,9 @@ echo "### Variant calling CNV second step ###"
echo "Start : $(date +"%F_%H-%M-%S")"
for currentSample in $samples
do
echo "Command : cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf"
cp $HEADER $ANALYSISDIR/$currentSample/$currentSample.CNV.vcf
cnvfile=`ls -lht $ANALYSISDIR/$currentSample/ | grep "\.annot.tsv" | head -1 | awk '{print $9}'`
echo "Command : qsub -pe smp 1 -N varcallCNV_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasename.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh"
qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh
qsub -pe smp 1 -N tsv2vcf_${currentSample} -o $ANALYSISDIR/$currentSample/logs/ -e $ANALYSISDIR/$currentSample/logs/ -v INPUTFILE=$ANALYSISDIR/$currentSample/$cnvfile,PHARMGKB=$TSVFILE,OMIM=$OMIM,REF=$REF,OUTPUTFILE=$ANALYSISDIR/$currentSample/$currentSample.CNV.vcf,LOGFILE=$ANALYSISDIR/$currentSample/logs/process_varcallCNV.$logbasenameCNV.log,CONFIGFILE=$CONFIGFILE $PIPELINEBASE/wrapper_tsv2vcf.sh
done
echo "End : $(date +"%F_%H-%M-%S")"
......
......@@ -20,3 +20,59 @@ class VCF_CNV:
def __repr__(self):
vcf = "{}\t{}\t{}\t{}\t{}\t{}\t{}\tSVTYPE={};END={};SVLEN={};CIPOS={};CIEND={};GENE={}\t{}\t{}".format(self.CHROM,self.POS,self.ID,self.REF,self.ALT,self.QUAL,self.FILTER,self.SVTYPE,self.END,self.SVLEN,self.CIPOS,self.CIEND,self.GENE,self.FORMAT,self.SAMPLE)
return vcf
def create_header():
header="""##fileformat=VCFv4.2
##reference=hg19
##INFO=<ID=BKPTID,Number=1,Type=String,Description="ID of the assembled alternate allele in the assembly file">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=1,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=MEINFO,Number=4,Type=String,Description="Mobile element info of the form NAME,START,END,POLARITY">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=GENE,Number=1,Type=String,Description="Involved genes">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
##ALT=<ID=DEL:ME:L1,Description="Deletion of L1 element">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
##ALT=<ID=INS,Description="Insertion of novel sequence">
##ALT=<ID=INS:ME:ALU,Description="Insertion of ALU element">
##ALT=<ID=INS:ME:L1,Description="Insertion of L1 element">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=CNV,Description="Copy number variable region">
##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
##contig=<ID=chrM,length=16571>
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chrY,length=59373566>
"""
return header
No preview for this file type
......@@ -4,7 +4,7 @@
## tsv2vcf.py
## Version : 1.1.0
## Description : Conversion a tsv file in vcf file.
## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -o <output/vcf/file> -e <log/file>
## Usage : python tsv2vcf.py -i <input/tsv/file> -p <input/tsv/file> -m<input/tsv/file> -r <reference/bam/fie> -o <output/vcf/file> -e <log/file>
## Output : vcf file
## Requirements : python 2.7+
......@@ -25,7 +25,7 @@ import logging
#Options
try:
opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:o:e:')
opts, args = getopt.getopt(sys.argv[1:], 'i:p:m:r:o:e:')
for opt, arg in opts:
if opt in ("-i"):
tsvfile = arg
......@@ -33,6 +33,8 @@ try:
pharmgkb = arg
if opt in ("-m"):
OMIM = arg
if opt in ("-r"):
REFERENCE = arg
elif opt in ("-o"):
out_vcf = arg
elif opt in ("-e"):
......@@ -55,6 +57,9 @@ if not vars().has_key('pharmgkb'):
if not vars().has_key('OMIM'):
logging.exception('No OMIM database given')
sys.exit(1)
if not vars().has_key('REFERENCE'):
logging.exception('No REFERENCE bam given')
sys.exit(1)
# Check files
try:
......@@ -75,7 +80,12 @@ try:
except IOError:
logging.exception('Hgmd file does not exist')
sys.exit(1)
try:
f = open(REFERENCE, 'r')
f.close()
except IOError:
logging.exception('reference bam file does not exist')
sys.exit(1)
"""Liste contenant les genes d'interets"""
......@@ -108,7 +118,7 @@ for gene in ListeG:
line_vcf = VCF_CNV()
line_vcf.CHROM = row["#CHR"]
line_vcf.POS = row["START"]
p = subprocess.Popen(["samtools", 'faidx',"/work/gad/sv347413/epilepsie/Pipeline/index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
p = subprocess.Popen(["samtools", 'faidx',REFERENCE, row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
line_vcf.REF = p.stdout.readlines()[1].strip("\n")
line_vcf.ALT = "<"+row["CNV"]+">"
line_vcf.QUAL = "6"
......@@ -123,6 +133,11 @@ for gene in ListeG:
ListeVCF = sorted(ListeVCF,key=attrgetter('POS'))
header = create_header()
with open (out_vcf,'w') as vcf_out:
vcf_out.write(header)
for line in ListeVCF:
with open (out_vcf,'a') as vcf_out:
vcf_out.write(repr(line)+"\n")
......@@ -4,7 +4,7 @@
## wrapper_tsv2vcf.sh
## Version : 1.0.0
## Description : a wrapper for qsubing tsv2vcf.py script
## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh
## Usage : qsub -pe smp <nb thread> -v INPUTFILE=</path/to/the/input/tsv/file>,PHARMGKB=<ref/file/pharmgkb>,OMIM=<ref/file/OMIM>,REF=<ref/bam/file>,OUTPUTFILE=</path/to/the/output/vcf/file>,[LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] wrapper_tsv2vcf.sh
## Output : annotate_variants.py output
## Requirements : Recquire script tsv2vcf.py and VcfTools
......@@ -66,6 +66,14 @@ then
exit 1
fi
if [ ! -f $REF ]
then
echo "reference bam file does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch tsv2vcf.failed
exit 1
fi
# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
......@@ -79,8 +87,8 @@ PYTHONBIN=`grep pythonbin $CONFIGFILE | cut -f2`
PIPELINEBASE=`grep pipelinebase $CONFIGFILE | cut -f2`
# Launch python script command and check exit code
echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE"
$PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -o $OUTPUTFILE -e $LOGFILE
echo "command : $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -r $REF -o $OUTPUTFILE -e $LOGFILE"
$PYTHONBIN $PIPELINEBASE/tsv2vcf.py -i $INPUTFILE -p $PHARMGKB -m $OMIM -r $REF -o $OUTPUTFILE -e $LOGFILE
tsv2vcf_exitcode=$?
echo "tsv2vcf exit code : $tsv2vcf_exitcode"
if [ $tsv2vcf_exitcode != 0 ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment