Commit fd649755 authored by simon verdez's avatar simon verdez

code correction

parent 187abbf2
...@@ -24,7 +24,7 @@ if [ ${PRINT_HELP} -eq 1 ] ...@@ -24,7 +24,7 @@ if [ ${PRINT_HELP} -eq 1 ]
then then
echo "Usage:" echo "Usage:"
echo "" echo ""
echo "bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir> -h [show this help] Extract_PharmGKB_Variants.sh" echo "bash -p <path/to/PharmGKB/file> -b <path/to/bam/file> -s <path/to/hlascan/files> -v <path/to/vcf/file> -o <path/to/output/dir> -h [print/this/help] Extract_PharmGKB_Variants.sh"
echo "" echo ""
echo "<Mandatory>: " echo "<Mandatory>: "
echo "-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site." echo "-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site."
...@@ -41,7 +41,6 @@ if [ "${PHARMGKB}" == "" ] ...@@ -41,7 +41,6 @@ if [ "${PHARMGKB}" == "" ]
then then
1>&2 echo "Missing input pharmgkb tsv file (-p). Abort." 1>&2 echo "Missing input pharmgkb tsv file (-p). Abort."
exit 2 exit 2
elif [ ! -f "${PHARMGKB}" ] elif [ ! -f "${PHARMGKB}" ]
then then
1>&2 echo "pharmgkb tsv file not found. Abort." 1>&2 echo "pharmgkb tsv file not found. Abort."
...@@ -53,7 +52,6 @@ if [ "${BAM}" == "" ] ...@@ -53,7 +52,6 @@ if [ "${BAM}" == "" ]
then then
1>&2 echo "Missing input BAMs (-b). Abort." 1>&2 echo "Missing input BAMs (-b). Abort."
exit 4 exit 4
elif [ ! -f "${BAM}" ] elif [ ! -f "${BAM}" ]
then then
1>&2 echo "BAM file not found. Abort." 1>&2 echo "BAM file not found. Abort."
...@@ -66,7 +64,6 @@ if [ "${HLASCAN}" == "" ] ...@@ -66,7 +64,6 @@ if [ "${HLASCAN}" == "" ]
then then
1>&2 echo "Missing input HLAscan path (-s). Abort." 1>&2 echo "Missing input HLAscan path (-s). Abort."
exit 6 exit 6
elif [ ! -d "${HLASCAN}" ] elif [ ! -d "${HLASCAN}" ]
then then
1>&2 echo "HLAscan path not found. Abort." 1>&2 echo "HLAscan path not found. Abort."
...@@ -78,7 +75,6 @@ if [ "${VCF}" == "" ] ...@@ -78,7 +75,6 @@ if [ "${VCF}" == "" ]
then then
1>&2 echo "Missing input vcf (-v). Abort." 1>&2 echo "Missing input vcf (-v). Abort."
exit 8 exit 8
elif [ ! -f "${VCF}" ] elif [ ! -f "${VCF}" ]
then then
1>&2 echo "vcf file not found. Abort." 1>&2 echo "vcf file not found. Abort."
...@@ -90,13 +86,31 @@ if [ "${OUTPUTDIR}" == "" ] ...@@ -90,13 +86,31 @@ if [ "${OUTPUTDIR}" == "" ]
then then
1>&2 echo "Missing input output dir path (-s). Abort." 1>&2 echo "Missing input output dir path (-s). Abort."
exit 10 exit 10
elif [ ! -d "${OUTPUTDIR}" ] elif [ ! -d "${OUTPUTDIR}" ]
then then
1>&2 echo "Output dir path not found. Abort." 1>&2 echo "Output dir path not found. Abort."
exit 11 exit 11
fi fi
# Check python version and PATH
pythonversion=$(python2.7 -V 2>&1)
PythonExitCode=$?
if [ $PythonExitCode -eq 0 ]
then
echo "python2.7 is in PATH, continue"
else
echo "missing python2.7 in PATH, use alias if necessary"
exit 12
fi
version=$(echo $pythonversion | cut -f2 -d" " | cut -f1,2 -d.)
if [ $version != '2.7' ]
then
echo "not the python version supported, please use python2.7"
exit 13
fi
echo "Input pharmgkb file: $PHARMGKB" echo "Input pharmgkb file: $PHARMGKB"
echo "Path to HLAscan: $HLASCAN" echo "Path to HLAscan: $HLASCAN"
echo "HLAscan executable: $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py" echo "HLAscan executable: $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py"
...@@ -108,7 +122,7 @@ echo "Output HLAscan: ${OUTPUTDIR}/HLAscan_result/" ...@@ -108,7 +122,7 @@ echo "Output HLAscan: ${OUTPUTDIR}/HLAscan_result/"
echo "Output HTML file: $OUTPUTDIR/Report.html" echo "Output HTML file: $OUTPUTDIR/Report.html"
#HLASCAN #HLASCAN
python $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt python2.7 $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
#create HTML #create HTML
python create_html.py -v $VCF -s $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html python2.7 create_html.py -v $VCF -s $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
\ No newline at end of file \ No newline at end of file
...@@ -19,10 +19,10 @@ pysam library. On output, you have a HTML file readable with any web navigator ( ...@@ -19,10 +19,10 @@ pysam library. On output, you have a HTML file readable with any web navigator (
Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: genotyping of the HLA region using next-generation sequencing data. BMC bioinformatics, 18(1), 258. Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: genotyping of the HLA region using next-generation sequencing data. BMC bioinformatics, 18(1), 258.
* Python: * Python2.7:
* Tested with python2.7 and python3.9 * Tested with python 2.7.18
* library: pysam , htmltag (required sphinx) * library: pysam , htmltag (required sphinx)
* [download python](https://www.python.org/download/releases/) * [download python](https://www.python.org/download/releases/python-2718/)
* PharmGKB csv file * PharmGKB csv file
* [download csv file](https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip) * [download csv file](https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip)
...@@ -33,9 +33,9 @@ Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: ...@@ -33,9 +33,9 @@ Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan:
## Installation ## Installation
* First install library with pip: * First install library with pip:
* pip install pysam * pip2.7 install pysam
* pip install sphinx * pip2.7 install sphinx
* pip install htmltag * pip2.7 install htmltag
* Clone this repository * Clone this repository
...@@ -54,11 +54,11 @@ https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_ ...@@ -54,11 +54,11 @@ https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_
bash -p <PharmGKB file> -b <bam file> -s <path to HLAscan files> -v <vcf file> -o <output dir> Extract_PharmGKB_Variants.sh bash -p <PharmGKB file> -b <bam file> -s <path to HLAscan files> -v <vcf file> -o <output dir> Extract_PharmGKB_Variants.sh
Options: Options:
-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site. -p <path to PharmGKB file> : Path to PharmGKB tsv file download at PharmGKB site.
-b <bam file>: Bam file provided by alignement. -b <path to bam file> : Path to bam file provided by alignement.
-s <path to hlascan files>: Path to HLAscan tools download and unzipped. -s <path to hlascan files> : Path to HLAscan tools download and unzipped.
-v <vcf file>: vcf file provided by variant calling -v <path to vcf file> : Path to vcf file provided by variant calling
-o <OUTPUT_PATH>: Output path. -o <output_path>: Output path.
-h print help -h print help
#### Data test #### Data test
......
...@@ -96,9 +96,11 @@ def writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,variant,body): ...@@ -96,9 +96,11 @@ def writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,variant,body):
def extractRS(data,vcfSNP,DictRs,body): def extractRS(data,vcfSNP,DictRs,body):
""" rs information extraction """ """ rs information extraction """
countSNV=0
vcf_reader_SNP = VariantFile(vcfSNP) vcf_reader_SNP = VariantFile(vcfSNP)
for record in vcf_reader_SNP.fetch(): for record in vcf_reader_SNP.fetch():
if record.id in DictRs: if record.id in DictRs:
countSNV+=1
for Drug in DictRs[record.id]: for Drug in DictRs[record.id]:
for Pheno in DictRs[record.id][Drug]: for Pheno in DictRs[record.id][Drug]:
gene = DictRs[record.id][Drug][Pheno]["Gene"] gene = DictRs[record.id][Drug][Pheno]["Gene"]
...@@ -107,6 +109,7 @@ def extractRS(data,vcfSNP,DictRs,body): ...@@ -107,6 +109,7 @@ def extractRS(data,vcfSNP,DictRs,body):
Publi = DictRs[record.id][Drug][Pheno]["Publi"] Publi = DictRs[record.id][Drug][Pheno]["Publi"]
body =writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,record.id,body) body =writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,record.id,body)
body = body.append(htmltag.H3("Number of SNV found = "+str(countSNV)))
return body return body
...@@ -244,6 +247,12 @@ def main(): ...@@ -244,6 +247,12 @@ def main():
#execute #execute
dictRs,dictHLA = parsePharmGKB(tsvfile) dictRs,dictHLA = parsePharmGKB(tsvfile)
if len(dictRs.keys()) == 0:
print("WARNING : no rs variant found in PharmGKB file")
if len(dictHLA.keys()) == 0:
print("WARNING : no HLA variant found in PharmGKB file")
head,body = contructHeader() head,body = contructHeader()
body = extractRS(tsvfile,vcfSNP,dictRs,body) body = extractRS(tsvfile,vcfSNP,dictRs,body)
body = extractHLA(report,dictHLA,body) body = extractHLA(report,dictHLA,body)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment