Commit 6bc7cbaf authored by Yannis Duffourd's avatar Yannis Duffourd

Initial commit

parents
#!/bin/bash
if [ ! -e /work/gad/sv347413/epilepsie/vcf_tmp/$i*.vcf ]
then
for i in $(cat /work/gad/sv347413/HLAscan/liste_patients_epileptiques.txt)
do
cp /work/gad/shared/vcf/individu/$i/*.raw.vcf /work/gad/sv347413/epilepsie/vcf_tmp/.
done
fi
if [ ! -e /work/gad/sv347413/epilepsie/tsv_tmp/$i*.tsv ]
then
for i in $(cat /work/gad/sv347413/HLAscan/liste_patients_epileptiques.txt)
do
cp /work/gad/shared/vcf/individu/$i/*.cnv.annot.tsv /work/gad/sv347413/epilepsie/tsv_tmp/.
done
fi
#!/bin/bash
set -e;
true;
i=$1
if [ ! -d /work/gad/sv347413/epilepsie/tmp_res/$i ]
then
mkdir /work/gad/sv347413/epilepsie/tmp_res/$i
fi
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf ]
then
echo "SNP"
/work/gad/sv347413/jdk1.8.0_111/bin/java -jar /work/gad/shared/bin/snpEff/SnpSift.jar annotate pharmgkb_v2.vcf /work/gad/sv347413/epilepsie/vcf_tmp/$i.* > /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf
fi
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf ]
then
echo "CNV"
cp /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf
fi
if [ ! -e /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf ]
then
echo "CNV2"
python2.7 tsv2vcf.py /work/gad/sv347413/epilepsie/tsv_tmp/$i* /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf
mv /work/gad/sv347413/epilepsie/tmp_res/$i/header_CNV.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf
fi
if [ ! -e /work/gad/sv347413/epilepsie/results_html/$i.html ]
then
echo "html"
python2.7 create_html_v2.py /work/gad/sv347413/epilepsie/tmp_res/$i/$i.SNP.vcf /work/gad/sv347413/epilepsie/tmp_res/$i/$i.CNV.vcf /work/gad/sv347413/HLAscan/results/$i/Report > /work/gad/sv347413/epilepsie/results_html/$i.html
fi
class VCF_CNV:
def __init__(self):
self.CHROM = 0
self.POS = 0
self.ID = "."
self.REF = "."
self.ALT = "."
self.QUAL = 6
self.FILTER = "PASS"
self.SVTYPE = None
self.END = 0
self.SVLEN = 0
self.CIPOS = "0,0"
self.CIEND = "0,0"
self.GENE = "."
self.FORMAT = "GT:GQ"
self.SAMPLE = ".:0"
def __repr__(self):
vcf = "{}\t{}\t{}\t{}\t{}\t{}\t{}\tSVTYPE={};END={};SVLEN={};CIPOS={};CIEND={};GENE={}\t{}\t{}".format(self.CHROM,self.POS,self.ID,self.REF,self.ALT,self.QUAL,self.FILTER,self.SVTYPE,self.END,self.SVLEN,self.CIPOS,self.CIEND,self.GENE,self.FORMAT,self.SAMPLE)
return vcf
import sys
from pyexcel_ods import *
from pysam import VariantFile
import htmltag
" construction du formulaire en premier "
head = htmltag.HEAD()
head = htmltag.append(htmltag.TITLE('pharmacogenetic'))
head = head.append(htmltag.STYLE(type="text/css").append("body { padding-left: 15em; font-family: Georgia; color:black }"))
head = head.append(htmltag.STYLE(type="text/css").append("h2 { padding-left: 15em;font-family: Georgia;color:blue }"))
head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar { list-style-type: none; padding: 0; margin: 0; position: absolute; top: 2em; left: 1em; width: 13em }"))
head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar li { background: yellow; margin: 0.5em 0; padding: 1em; border-left: 0.5em solid black }"))
head = head.append(htmltag.STYLE(type="text/css").append("P1 {color: red }"))
head = head.append(htmltag.STYLE(type="text/css").append("P2 { color: orange }"))
body = htmltag.BODY()
body = body.append(htmltag.UL(CLASS = "navbar").append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://www.pharmgkb.org").append("pharmgkb")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://cpicpgx.org").append("cpicpgx")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "http://www.pharmacogenetics.fr/").append("pharmacogenetics.fr")))))
body = body.append(htmltag.H2('Variants pharmgkb'))
"""extraction des donnees """
data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods")
vcf_reader_SNP = VariantFile(sys.argv[1])
vcf_reader_CNV = VariantFile(sys.argv[2])
Report_HLA = open(sys.argv[3],"r")
for record in vcf_reader_SNP.fetch():
try:
for i in range(len(record.info["Pharmaco_Annot"])/12):
try:
gene = repr(record.info["GENEINFO"])
except:
gene = "intron"
alt = record.info["Pharmaco_Annot"][(i*12)+1].replace(")","").replace("'","").replace("-","")
pheno = record.info["Pharmaco_Annot"][(i*12)+3].replace(")","").replace("'","").replace("-"," ")
publi = record.info["Pharmaco_Annot"][(i*12)+5].replace(")","").replace("'","").replace("-"," ")
level = record.info["Pharmaco_Annot"][(i*12)+7].replace(")","").replace("'","").replace("-","")
patho = record.info["Pharmaco_Annot"][(i*12)+9].replace(")","").replace("'","").replace("-","")
molecule = record.info["Pharmaco_Annot"][(i*12)+11].replace(")","").replace("'","").replace("-"," ").replace("]","")
if level.startswith("1"):
body= body.append(htmltag.P(htmltag.P1("pathologie: "+patho))+htmltag.P(htmltag.P1("molecule: "+molecule))+htmltag.P(htmltag.P1("gene: "+gene))+htmltag.P(htmltag.P1("variant: "+repr(record.pos)+" "+repr(record.ref)+">"+repr(record.alts)+" "+repr(record.id)))+htmltag.P(htmltag.P1("base alternative: "+alt))+htmltag.P(htmltag.P1("niveau: "+level))+htmltag.P(htmltag.P1("phenotype: "+pheno)))
body= body.append(htmltag.P(htmltag.P1(htmltag.A(href = publi).append("publication: "+publi))))
elif level.startswith("2"):
body= body.append(htmltag.P(htmltag.P2("pathologie: "+patho))+htmltag.P(htmltag.P2("molecule: "+molecule))+htmltag.P(htmltag.P2("gene: "+gene))+htmltag.P(htmltag.P2("variant: "+repr(record.pos)+" "+repr(record.ref)+">"+repr(record.alts)+" "+repr(record.id)))+htmltag.P(htmltag.P2("base alternative: "+alt))+htmltag.P(htmltag.P2("niveau: "+level))+htmltag.P(htmltag.P2("phenotype: "+pheno)))
body= body.append(htmltag.P(htmltag.P2(htmltag.A(href = publi).append("publication: "+publi))))
else :
body= body.append(htmltag.P("pathologie: "+patho)+htmltag.P("molecule: "+molecule)+htmltag.P("gene: "+gene)+htmltag.P("variant: "+repr(record.pos)+" "+repr(record.ref)+">"+repr(record.alts)+" "+repr(record.id))+htmltag.P("base alternative: "+alt)+htmltag.P("niveau: "+level)+htmltag.P(htmltag.P("phenotype: "+pheno)))
body= body.append(htmltag.P(htmltag.A(href = publi).append("publication: "+publi)))
body= body.append(htmltag.P("#######################################################################"))
except:
pass
body = body.append(htmltag.H2('Deletion ou duplication'))
for record in vcf_reader_CNV.fetch():
for elm in record.alts:
if elm == "<DEL>":
ListeM = []
body= body.append(htmltag.P("Deletion detectee par xHMM sur le gene "+record.info["GENE"]))
for i in range(len(data["hg19"])-1):
if not data["hg19"][i]:
pass
else:
if data["hg19"][i][1] != "gene":
if data["hg19"][i][1] == record.info["GENE"]:
if data["hg19"][i][0] not in ListeM:
ListeM.append(data["hg19"][i][0])
for Mol in ListeM:
body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
if elm == "<DUP>":
ListeM = []
body= body.append(htmltag.P("Duplication detectee par xHMM sur le gene "+record.info["GENE"]))
for i in range(len(data["hg19"])-1):
if not data["hg19"][i]:
pass
else:
if data["hg19"][i][1] != "gene":
if data["hg19"][i][1] == record.info["GENE"]:
if data["hg19"][i][0] not in ListeM:
ListeM.append(data["hg19"][i][0])
for Mol in ListeM:
body= body.append(htmltag.P("######Gene ayant une relation avec :"+Mol))
HLA = Report_HLA.readlines()
ListeA = []
ListeB = []
ListeC = []
ListeDQB1 = []
ListeDRB1 = []
for lines in HLA :
if lines.startswith("A"):
for ligne in lines.split("\t"):
if ligne.startswith("A*"):
ListeA.append(ligne)
if lines.startswith("B"):
for ligne in lines.split("\t"):
if ligne.startswith("B*"):
ListeB.append(ligne)
if lines.startswith("C"):
for ligne in lines.split("\t"):
if ligne.startswith("C*"):
ListeC.append(ligne)
if lines.startswith("DQB1"):
for ligne in lines.split("\t"):
if ligne.startswith("DQB1*"):
ListeDQB1.append(ligne)
if lines.startswith("DRB1"):
for ligne in lines.split("\t"):
if ligne.startswith("DRB1*"):
ListeDRB1.append(ligne)
body = body.append(htmltag.H2('Resultat genotypage HLA'))
body = body.append(htmltag.P("HLA-A : "+ListeA[0]+" / "+ListeA[1]))
body = body.append(htmltag.P("HLA-B : "+ListeB[0]+" / "+ListeB[1]))
body = body.append(htmltag.P("HLA-C : "+ListeC[0]+" / "+ListeC[1]))
body = body.append(htmltag.P("HLA-DQB1 : "+ListeDQB1[0]+" / "+ListeDQB1[1]))
body = body.append(htmltag.P("HLA-DRB1 : "+ListeDRB1[0]+" / "+ListeDRB1[1]))
for i in range(len(data["hg19"])-1):
if not data["hg19"][i]:
pass
else:
if data["hg19"][i][1] != "gene":
if data["hg19"][i][2].startswith("HLA-A"):
Adigit = data["hg19"][i][2].split("*")[1]
Adigit2 = Adigit.split(":")[0]+Adigit.split(":")[1]
HL1 = ListeA[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1]
HL2 = ListeA[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1]
if HL1 == Adigit2:
pheno = data["hg19"][i][4]
publi = data["hg19"][i][5]
level = str(data["hg19"][i][10])
if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if level.startswith("2"):
body = body.append(htmltag.P2(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
else:
body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if HL2 == Adigit2:
pheno = data["hg19"][i][4]
publi = data["hg19"][i][5]
level = str(data["hg19"][i][10])
if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if level.startswith("2"):
body = body.append(htmltag.P2(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
else:
body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if data["hg19"][i][2].startswith("HLA-B"):
Bdigit = data["hg19"][i][2].split("*")[1]
Bdigit2 = Adigit.split(":")[0]+Adigit.split(":")[1]
HL1 = ListeB[0].split("*")[1].split(":")[0]+ListeA[0].split("*")[1].split(":")[1]
HL2 = ListeB[1].split("*")[1].split(":")[0]+ListeA[1].split("*")[1].split(":")[1]
if HL1 == Bdigit2:
pheno = data["hg19"][i][4]
publi = data["hg19"][i][5]
level = str(data["hg19"][i][10])
if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if level.startswith("2"):
body = body.append(htmltag.P2(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P2(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
else:
body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if HL2 == Bdigit2:
pheno = data["hg19"][i][4]
publi = data["hg19"][i][5]
level = str(data["hg19"][i][10])
if level.startswith("1"):
body = body.append(htmltag.P1(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P1(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
if level.startswith("2"):
body = body.append(htmltag.P2(htmltag.P(pheno)+htmltag.P(level)))
body = body.append(htmltag.P2(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
else:
body = body.append(htmltag.P(pheno)+htmltag.P(level))
body = body.append(htmltag.P(htmltag.P(htmltag.A(href = publi).append("publication: "+publi))))
print htmltag.HTML(head+body)
#!/bin/bash
set -e;
true;
for i in $(cat /work/gad/sv347413/HLAscan/liste_patients_epileptiques.txt)
do
echo $i
qsub -V -q batch /work/gad/sv347413/epilepsie/Pipeline/Master.sh ${i}
done
import os
import sys
import vcf
import csv
from pyexcel_ods import *
from VCF_tools import *
import subprocess
from operator import *
"""extraction du tableau """
data = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau pharmgkb.ods")
data2 = get_data("/work/gad/sv347413/epilepsie/Pipeline/tableau OMIM.ods")
"""Liste contenant les genes d'interets"""
ListeG = []
"""recuperation des genes dans le fichier"""
for i in range(len(data["hg19"])-1):
if not data["hg19"][i]:
pass
else:
if data["hg19"][i][1] != "gene":
"""remplissage de la liste"""
if data["hg19"][i][1] not in ListeG:
ListeG.append((data["hg19"][i][1]))
for i in range(len(data2["hg19"])-1):
if not data2["hg19"][i]:
pass
else:
if data2["hg19"][i][1] != "GENE":
if data2["hg19"][i][1] not in ListeG:
ListeG.append((data2["hg19"][i][1]))
#print(ListeG)
"""ecriture des lignes CNV dans le vcf"""
ListeVCF=[]
for gene in ListeG:
with open(sys.argv[1]) as tsvfile:
reader = csv.DictReader(tsvfile, dialect='excel-tab')
for row in reader:
if gene in row['GeneList'] and gene != "":
line_vcf = VCF_CNV()
line_vcf.CHROM = row["#CHR"]
line_vcf.POS = row["START"]
p = subprocess.Popen(["samtools", 'faidx',"index/hg19_essential.fa", row["#CHR"]+":"+row["START"]+"-"+row["START"]], stdout = subprocess.PIPE)
line_vcf.REF = p.stdout.readlines()[1].strip("\n")
line_vcf.ALT = "<"+row["CNV"]+">"
line_vcf.QUAL = "6"
line_vcf.SVTYPE = row["CNV"]
line_vcf.END = row["END"]
if row["CNV"] == "DEL":
line_vcf.SVLEN = "-"+row["CNVLength"]
else :
line_vcf.SVLEN =row["CNVLength"]
line_vcf.GENE = str(gene)
ListeVCF.append(line_vcf)
ListeVCF = sorted(ListeVCF,key=attrgetter('POS'))
#print ListeVCF
for line in ListeVCF:
with open (sys.argv[2],'a') as vcf_out:
vcf_out.write(repr(line)+"\n")
#test = vcf.Reader(filename = "header_CNV.vcf")
#for record in test:
# print record
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment