Commit 187abbf2 authored by simon verdez's avatar simon verdez

relecture code YD

parent 5531345a
#!/bin/bash
PHARMGKB=""
......@@ -6,7 +5,6 @@ HLASCAN=""
BAM=""
VCF=""
OUTPUTDIR=""
PRINT_HELP=0
while [[ "$#" -gt 0 ]]; do
......@@ -17,17 +15,16 @@ while [[ "$#" -gt 0 ]]; do
-v|--vcf) VCF="$2" ; shift ;;
-o|--out-directory) OUTPUTDIR="$2" ; shift ;;
-h|--help) PRINT_HELP=1 ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
*) echo "Unknown parameter passed: $1"; exit 0 ;;
esac
shift
done
if [ ${PRINT_HELP} -eq 1 ]
then
echo "Usage:"
echo ""
echo "bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir> -h [show this help]"
echo "bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir> -h [show this help] Extract_PharmGKB_Variants.sh"
echo ""
echo "<Mandatory>: "
echo "-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site."
......@@ -36,8 +33,7 @@ then
echo "-v <vcf file>: vcf file provided by variant calling"
echo "-o <OUTPUT_PATH>: Output path."
echo ""
exit 0
exit 1
fi
# Check pharmgkb file is provided and exists
......@@ -112,8 +108,7 @@ echo "Output HLAscan: ${OUTPUTDIR}/HLAscan_result/"
echo "Output HTML file: $OUTPUTDIR/Report.html"
#HLASCAN
python2.7 $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
python $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
#create HTML
python2.7 create_html.py -v $VCF -h $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
python create_html.py -v $VCF -s $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
\ No newline at end of file
......@@ -19,9 +19,10 @@ pysam library. On output, you have a HTML file readable with any web navigator (
Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: genotyping of the HLA region using next-generation sequencing data. BMC bioinformatics, 18(1), 258.
* Python2.7:
* Python:
* Tested with python2.7 and python3.9
* library: pysam , htmltag (required sphinx)
* [download python2.7](https://www.python.org/download/releases/2.7/)
* [download python](https://www.python.org/download/releases/)
* PharmGKB csv file
* [download csv file](https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip)
......@@ -32,9 +33,9 @@ Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan:
## Installation
* First install library with pip:
* pip2.7 install pysam
* pip2.7 install sphinx
* pip2.7 install htmltag
* pip install pysam
* pip install sphinx
* pip install htmltag
* Clone this repository
......@@ -50,8 +51,16 @@ https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_
### Execution
bash -p <PharmGKB file> -b <bam file> -s <path to HLAscan files> -v <vcf file> -o <output dir>
bash -p <PharmGKB file> -b <bam file> -s <path to HLAscan files> -v <vcf file> -o <output dir> Extract_PharmGKB_Variants.sh
Options:
-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site.
-b <bam file>: Bam file provided by alignement.
-s <path to hlascan files>: Path to HLAscan tools download and unzipped.
-v <vcf file>: vcf file provided by variant calling
-o <OUTPUT_PATH>: Output path.
-h print help
#### Data test
bash -p data_test/PharmGKB_Epilepsy.tsv -b data_test/NA12878.5gene.bam -s <path to HLAscan files> -v data_test/NIST-hg001-7001-b-gatk.vcf -o data_test/
\ No newline at end of file
bash -p data_test/PharmGKB_Epilepsy.tsv -b data_test/NA12878.5gene.bam -s <path to HLAscan files> -v data_test/NIST-hg001-7001-b-gatk.vcf -o data_test/ Extract_PharmGKB_Variants.sh
\ No newline at end of file
#!/usr/bin/python
### PharmAnnot PIPELINE ###
## create_html_v2.py
## create_html.py
## Version : 1.0.0
## Description : Create a html file with vcf file, BAM file and report from HLAscan
## Usage : python create_html_v2.py -v vcf_SNP -c vcf_CNV -h report_HLA -b BAM -t tsvfile -o html -e log
## Usage : python create_html.py -v vcf_SNP -s report_HLA -b BAM -t tsvfile -o html -e log
## Output : Annotated vcf file
## Requirements : python 2.7+ , vcf , htmltag
## Author : Simon.Verdez@u-bourgogne.fr
## Known bugs : None
import sys
import getopt
import subprocess
......@@ -21,65 +17,63 @@ import htmltag
import logging
from operator import *
def Parse_PharmGKB(tsvfile):
def parsePharmGKB(tsvfile):
""" Parse PharmGKB file """
DictRs=dict()
DictHLA=dict()
DictRs = dict()
DictHLA = dict()
with open(tsvfile,'r') as PharmGKB:
reader = csv.DictReader(PharmGKB, delimiter='\t')
for row in reader:
if row['Variant/Haplotypes'].startswith('rs'):
variant = row['Variant/Haplotypes']
gene = row['Gene']
LevelEvi= row['Level of Evidence']
LevelModi= row['Level Modifiers']
Drug=row['Drug(s)']
Pheno=row['Phenotype(s)']
Publi=row['URL']
LevelEvi = row['Level of Evidence']
LevelModi = row['Level Modifiers']
Drug = row['Drug(s)']
Pheno = row['Phenotype(s)']
Publi = row['URL']
DictRs.setdefault(variant,{})
DictRs[variant].setdefault(Drug,{})
DictRs[variant][Drug].setdefault(Pheno,{})
DictRs[variant][Drug][Pheno]["LevelEvi"]=LevelEvi
DictRs[variant][Drug][Pheno]["LevelMod"]=LevelModi
DictRs[variant][Drug][Pheno]["Publi"]=Publi
DictRs[variant][Drug][Pheno]["LevelEvi"] = LevelEvi
DictRs[variant][Drug][Pheno]["LevelMod"] = LevelModi
DictRs[variant][Drug][Pheno]["Publi"] = Publi
if gene == "":
DictRs[variant][Drug][Pheno]["Gene"]="intron"
DictRs[variant][Drug][Pheno]["Gene"] = "intron"
else:
DictRs[variant][Drug][Pheno]["Gene"]=gene
DictRs[variant][Drug][Pheno]["Gene"] = gene
elif row['Variant/Haplotypes'].startswith('HLA'):
allele = row['Variant/Haplotypes'].split(":")[0]+":"+row['Variant/Haplotypes'].split(":")[1]
gene = row['Gene']
LevelEvi= row['Level of Evidence']
LevelModi= row['Level Modifiers']
Drug=row['Drug(s)']
Pheno=row['Phenotype(s)']
Publi=row['URL']
LevelEvi = row['Level of Evidence']
LevelModi = row['Level Modifiers']
Drug = row['Drug(s)']
Pheno = row['Phenotype(s)']
Publi = row['URL']
DictHLA.setdefault(allele,{})
DictHLA[allele].setdefault(Drug,{})
DictHLA[allele][Drug].setdefault(Pheno,{})
DictHLA[allele][Drug][Pheno]["LevelEvi"]=LevelEvi
DictHLA[allele][Drug][Pheno]["LevelMod"]=LevelModi
DictHLA[allele][Drug][Pheno]["Publi"]=Publi
DictHLA[allele][Drug][Pheno]["LevelEvi"] = LevelEvi
DictHLA[allele][Drug][Pheno]["LevelMod"]= LevelModi
DictHLA[allele][Drug][Pheno]["Publi"] = Publi
if gene == "":
DictHLA[allele][Drug][Pheno]["Gene"]="intron"
DictHLA[allele][Drug][Pheno]["Gene"] = "intron"
else:
DictHLA[allele][Drug][Pheno]["Gene"]=gene
DictHLA[allele][Drug][Pheno]["Gene"] = gene
return DictRs,DictHLA
def contruct_header():
def contructHeader():
""" Header construction """
head = htmltag.HEAD()
head = htmltag.append(htmltag.TITLE('pharmacogenetic'))
head = head.append(htmltag.STYLE(type="text/css").append("body { padding-left: 15em; font-family: Georgia; color:black }"))
head = head.append(htmltag.STYLE(type="text/css").append("h2 { padding-left: 15em;font-family: Georgia;color:blue }"))
head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar { list-style-type: none; padding: 0; margin: 0; position: absolute; top: 2em; left: 1em; width: 13em }"))
head = head.append(htmltag.STYLE(type="text/css").append("ul.navbar li { background: yellow; margin: 0.5em 0; padding: 1em; border-left: 0.5em solid black }"))
head = head.append(htmltag.STYLE(type="text/css").append("P1 { color: red }"))
head = head.append(htmltag.STYLE(type="text/css").append("P2 { color: orange }"))
head = head.append(htmltag.STYLE(type = "text/css").append("body { padding-left: 15em; font-family: Georgia; color:black }"))
head = head.append(htmltag.STYLE(type = "text/css").append("h2 { padding-left: 15em;font-family: Georgia;color:blue }"))
head = head.append(htmltag.STYLE(type = "text/css").append("ul.navbar { list-style-type: none; padding: 0; margin: 0; position: absolute; top: 2em; left: 1em; width: 13em }"))
head = head.append(htmltag.STYLE(type = "text/css").append("ul.navbar li { background: yellow; margin: 0.5em 0; padding: 1em; border-left: 0.5em solid black }"))
head = head.append(htmltag.STYLE(type = "text/css").append("P1 { color: red }"))
head = head.append(htmltag.STYLE(type = "text/css").append("P2 { color: orange }"))
body = htmltag.BODY()
body = body.append(htmltag.UL(CLASS = "navbar").append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://www.pharmgkb.org").append("pharmgkb")))).append(htmltag.UL(htmltag.LI(htmltag.A(href = "https://cpicpgx.org").append("cpicpgx")))))
......@@ -88,58 +82,54 @@ def contruct_header():
return head,body
def write_body_rs(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,variant,body):
"""function for facilitate rs extraction """
def writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,variant,body):
"""to facilitate rs extraction """
if LevelEvi == "1A" or LevelEvi == "1B":
body= body.append(htmltag.P(htmltag.P1("Phenotype: "+Pheno))+htmltag.P(htmltag.P1("Drug: "+Drug))+htmltag.P(htmltag.P1("gene: "+gene))+htmltag.P(htmltag.P1("variant: "+variant))+htmltag.P(htmltag.P1("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P1("Publication: "+Publi))+htmltag.P(htmltag.P1("Level Modifier: "+LevelModi)))
body = body.append(htmltag.P(htmltag.P1("Phenotype: "+Pheno))+htmltag.P(htmltag.P1("Drug: "+Drug))+htmltag.P(htmltag.P1("gene: "+gene))+htmltag.P(htmltag.P1("variant: "+variant))+htmltag.P(htmltag.P1("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P1("Publication: "+Publi))+htmltag.P(htmltag.P1("Level Modifier: "+LevelModi)))
elif LevelEvi == "2A" or LevelEvi == "2B":
body= body.append(htmltag.P(htmltag.P2("Phenotype: "+Pheno))+htmltag.P(htmltag.P2("Drug: "+Drug))+htmltag.P(htmltag.P2("gene: "+gene))+htmltag.P(htmltag.P2("variant: "+variant))+htmltag.P(htmltag.P2("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P2("Publication: "+Publi))+htmltag.P(htmltag.P2("Level Modifier: "+LevelModi)))
body = body.append(htmltag.P(htmltag.P2("Phenotype: "+Pheno))+htmltag.P(htmltag.P2("Drug: "+Drug))+htmltag.P(htmltag.P2("gene: "+gene))+htmltag.P(htmltag.P2("variant: "+variant))+htmltag.P(htmltag.P2("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P2("Publication: "+Publi))+htmltag.P(htmltag.P2("Level Modifier: "+LevelModi)))
else :
body= body.append(htmltag.P(htmltag.P("Phenotype: "+Pheno))+htmltag.P(htmltag.P("Drug: "+Drug))+htmltag.P(htmltag.P("gene: "+gene))+htmltag.P(htmltag.P("variant: "+variant))+htmltag.P(htmltag.P("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P("Publication: "+Publi))+htmltag.P(htmltag.P("Level Modifier: "+LevelModi)))
body= body.append(htmltag.P("#######################################################################"))
body = body.append(htmltag.P(htmltag.P("Phenotype: "+Pheno))+htmltag.P(htmltag.P("Drug: "+Drug))+htmltag.P(htmltag.P("gene: "+gene))+htmltag.P(htmltag.P("variant: "+variant))+htmltag.P(htmltag.P("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P("Publication: "+Publi))+htmltag.P(htmltag.P("Level Modifier: "+LevelModi)))
body = body.append(htmltag.hr())
return body
def extract_rs(data,vcfSNP,DictRs,body):
def extractRS(data,vcfSNP,DictRs,body):
""" rs information extraction """
vcf_reader_SNP = VariantFile(vcfSNP)
for record in vcf_reader_SNP.fetch():
if record.id in DictRs:
for Drug in DictRs[record.id]:
for Pheno in DictRs[record.id][Drug]:
gene = DictRs[record.id][Drug][Pheno]["Gene"]
LevelEvi= DictRs[record.id][Drug][Pheno]["LevelEvi"]
LevelModi= DictRs[record.id][Drug][Pheno]["LevelMod"]
Publi= DictRs[record.id][Drug][Pheno]["Publi"]
body=write_body_rs(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,record.id,body)
LevelEvi = DictRs[record.id][Drug][Pheno]["LevelEvi"]
LevelModi = DictRs[record.id][Drug][Pheno]["LevelMod"]
Publi = DictRs[record.id][Drug][Pheno]["Publi"]
body =writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,record.id,body)
return body
def write_body_HLA(HLA,DictHLA,body):
"""function for facilitate HLA extraction """
def writeBodyHLA(HLA,DictHLA,body):
"""to facilitate HLA extraction """
if HLA in DictHLA:
for Drug in DictHLA[HLA]:
for Pheno in DictHLA[HLA][Drug]:
gene = DictHLA[HLA][Drug][Pheno]["Gene"]
LevelEvi= DictHLA[HLA][Drug][Pheno]["LevelEvi"]
LevelModi=DictHLA[HLA][Drug][Pheno]["LevelMod"]
Publi=DictHLA[HLA][Drug][Pheno]["Publi"]
body= body.append(htmltag.P("#######################################################################"))
LevelEvi = DictHLA[HLA][Drug][Pheno]["LevelEvi"]
LevelModi = DictHLA[HLA][Drug][Pheno]["LevelMod"]
Publi = DictHLA[HLA][Drug][Pheno]["Publi"]
body = body.append(htmltag.hr())
if LevelEvi == "1A" or LevelEvi == "1B":
body= body.append(htmltag.P(htmltag.P1("Phenotype: "+Pheno))+htmltag.P(htmltag.P1("Drug: "+Drug))+htmltag.P(htmltag.P1("allele: "+HLA))+htmltag.P(htmltag.P1("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P1("Publication: "+Publi))+htmltag.P(htmltag.P1("Level Modifier: "+LevelModi)))
body = body.append(htmltag.P(htmltag.P1("Phenotype: "+Pheno))+htmltag.P(htmltag.P1("Drug: "+Drug))+htmltag.P(htmltag.P1("allele: "+HLA))+htmltag.P(htmltag.P1("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P1("Publication: "+Publi))+htmltag.P(htmltag.P1("Level Modifier: "+LevelModi)))
elif LevelEvi == "2A" or LevelEvi == "2B":
body= body.append(htmltag.P(htmltag.P2("Phenotype: "+Pheno))+htmltag.P(htmltag.P2("Drug: "+Drug))+htmltag.P(htmltag.P2("allele: "+HLA))+htmltag.P(htmltag.P2("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P2("Publication: "+Publi))+htmltag.P(htmltag.P2("Level Modifier: "+LevelModi)))
body = body.append(htmltag.P(htmltag.P2("Phenotype: "+Pheno))+htmltag.P(htmltag.P2("Drug: "+Drug))+htmltag.P(htmltag.P2("allele: "+HLA))+htmltag.P(htmltag.P2("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P2("Publication: "+Publi))+htmltag.P(htmltag.P2("Level Modifier: "+LevelModi)))
else :
body= body.append(htmltag.P(htmltag.P("Phenotype: "+Pheno))+htmltag.P(htmltag.P("Drug: "+Drug))+htmltag.P(htmltag.P("allele: "+HLA))+htmltag.P(htmltag.P("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P("Publication: "+Publi))+htmltag.P(htmltag.P("Level Modifier: "+LevelModi)))
body = body.append(htmltag.P(htmltag.P("Phenotype: "+Pheno))+htmltag.P(htmltag.P("Drug: "+Drug))+htmltag.P(htmltag.P("allele: "+HLA))+htmltag.P(htmltag.P("Level of Evidence: "+LevelEvi))+htmltag.P(htmltag.P("Publication: "+Publi))+htmltag.P(htmltag.P("Level Modifier: "+LevelModi)))
return body
def extract_HLA(report,DictHLA,body):
def extractHLA(report,DictHLA,body):
""" HLA information extraction """
with open(report,'r') as Report_HLA:
HLA = Report_HLA.readlines()
......@@ -177,27 +167,26 @@ def extract_HLA(report,DictHLA,body):
body = body.append(htmltag.P("HLA-DQB1 : "+ListeDQB1[0]+" / "+ListeDQB1[1]))
body = body.append(htmltag.P("HLA-DRB1 : "+ListeDRB1[0]+" / "+ListeDRB1[1]))
HL1A = "HLA-"+ListeA[0].split(":")[0]+":"+ListeA[0].split(":")[1]
HL2A = "HLA-"+ListeA[1].split(":")[0]+":"+ListeA[1].split(":")[1]
HL1B = "HLA-"+ListeB[0].split(":")[0]+":"+ListeB[0].split(":")[1]
HL2B = "HLA-"+ListeB[1].split(":")[0]+":"+ListeB[1].split(":")[1]
body = write_body_HLA(HL1A,DictHLA,body)
body = write_body_HLA(HL2A,DictHLA,body)
body = write_body_HLA(HL1B,DictHLA,body)
body = write_body_HLA(HL2B,DictHLA,body)
body = writeBodyHLA(HL1A,DictHLA,body)
body = writeBodyHLA(HL2A,DictHLA,body)
body = writeBodyHLA(HL1B,DictHLA,body)
body = writeBodyHLA(HL2B,DictHLA,body)
return body
def main():
# Options
try:
opts, args = getopt.getopt(sys.argv[1:],"v:h:t:o:")
opts, args = getopt.getopt(sys.argv[1:],"v:s:t:o:")
for opt, arg in opts:
if opt in ("-v"):
vcfSNP = arg
if opt in ("-h"):
if opt in ("-s"):
report = arg
if opt in ("-t"):
tsvfile = arg
......@@ -205,26 +194,26 @@ def main():
outputfile = arg
except getopt.GetoptError:
print("please use this command as follow: ")
print('usage : python2.7 create_html.py -v <vcf/file> -h <report/from/HLAscan> -t tableau_pharmgkb.tsv -o <path/to/file/results>')
print('usage : python2.7 create_html.py -v <path/to/vcf/file> -s <path/to/report/from/HLAscan> -t <path/to/pharmgkb/table> -o <path/to/file/results>')
sys.exit(1)
# Manage options
if not vars().has_key('vcfSNP'):
if not 'vcfSNP' in vars():
print('No vcf SNP file given')
print('usage : python2.7 create_html.py -v <vcf/file> -h <report/from/HLAscan> -t tableau_pharmgkb.tsv -o <path/to/file/results>')
print('usage : python2.7 create_html.py -v <path/to/vcf/file> -s <path/to/report/from/HLAscan> -t <path/to/pharmgkb/table> -o <path/to/file/results>')
sys.exit(1)
if not vars().has_key('report'):
if not 'report' in vars():
print('No report HLAscan given')
print('usage : python2.7 create_html.py -v <vcf/file> -h <report/from/HLAscan> -t tableau_pharmgkb.tsv -o <path/to/file/results>')
print('usage : python2.7 create_html.py -v <path/to/vcf/file> -s <path/to/report/from/HLAscan> -t <path/to/pharmgkb/table> -o <path/to/file/results>')
sys.exit(1)
if not vars().has_key('tsvfile'):
if not 'tsvfile' in vars():
print('No tsv file given')
print('usage : python2.7 create_html.py -v <vcf/file> -h <report/from/HLAscan> -t tableau_pharmgkb.tsv -o <path/to/file/results>')
print('usage : python2.7 create_html.py -v <path/to/vcf/file> -s <path/to/report/from/HLAscan> -t <path/to/pharmgkb/table> -o <path/to/file/results>')
sys.exit(1)
if not vars().has_key('outputfile'):
if not 'outputfile' in vars():
print('No output file given')
print('usage : python2.7 create_html.py -v <vcf/file> -h <report/from/HLAscan> -t tableau_pharmgkb.tsv -o <path/to/file/results>')
print('usage : python2.7 create_html.py -v <path/to/vcf/file> -s <path/to/report/from/HLAscan> -t <path/to/pharmgkb/table> -o <path/to/file/results>')
sys.exit(1)
# Check files
......@@ -254,13 +243,10 @@ def main():
sys.exit(1)
#execute
DictRs,DictHLA=Parse_PharmGKB(tsvfile)
head,body=contruct_header()
body=extract_rs(tsvfile,vcfSNP,DictRs,body)
body=extract_HLA(report,DictHLA,body)
dictRs,dictHLA = parsePharmGKB(tsvfile)
head,body = contructHeader()
body = extractRS(tsvfile,vcfSNP,dictRs,body)
body = extractHLA(report,dictHLA,body)
with open(outputfile,'w') as output:
output.write(htmltag.HTML(head+body))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment