Commit fd649755 authored by simon verdez's avatar simon verdez

code correction

parent 187abbf2
......@@ -24,7 +24,7 @@ if [ ${PRINT_HELP} -eq 1 ]
then
echo "Usage:"
echo ""
echo "bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir> -h [show this help] Extract_PharmGKB_Variants.sh"
echo "bash -p <path/to/PharmGKB/file> -b <path/to/bam/file> -s <path/to/hlascan/files> -v <path/to/vcf/file> -o <path/to/output/dir> -h [print/this/help] Extract_PharmGKB_Variants.sh"
echo ""
echo "<Mandatory>: "
echo "-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site."
......@@ -41,7 +41,6 @@ if [ "${PHARMGKB}" == "" ]
then
1>&2 echo "Missing input pharmgkb tsv file (-p). Abort."
exit 2
elif [ ! -f "${PHARMGKB}" ]
then
1>&2 echo "pharmgkb tsv file not found. Abort."
......@@ -53,7 +52,6 @@ if [ "${BAM}" == "" ]
then
1>&2 echo "Missing input BAMs (-b). Abort."
exit 4
elif [ ! -f "${BAM}" ]
then
1>&2 echo "BAM file not found. Abort."
......@@ -66,7 +64,6 @@ if [ "${HLASCAN}" == "" ]
then
1>&2 echo "Missing input HLAscan path (-s). Abort."
exit 6
elif [ ! -d "${HLASCAN}" ]
then
1>&2 echo "HLAscan path not found. Abort."
......@@ -78,7 +75,6 @@ if [ "${VCF}" == "" ]
then
1>&2 echo "Missing input vcf (-v). Abort."
exit 8
elif [ ! -f "${VCF}" ]
then
1>&2 echo "vcf file not found. Abort."
......@@ -90,13 +86,31 @@ if [ "${OUTPUTDIR}" == "" ]
then
1>&2 echo "Missing input output dir path (-s). Abort."
exit 10
elif [ ! -d "${OUTPUTDIR}" ]
then
1>&2 echo "Output dir path not found. Abort."
exit 11
fi
# Check python version and PATH
pythonversion=$(python2.7 -V 2>&1)
PythonExitCode=$?
if [ $PythonExitCode -eq 0 ]
then
echo "python2.7 is in PATH, continue"
else
echo "missing python2.7 in PATH, use alias if necessary"
exit 12
fi
version=$(echo $pythonversion | cut -f2 -d" " | cut -f1,2 -d.)
if [ $version != '2.7' ]
then
echo "not the python version supported, please use python2.7"
exit 13
fi
echo "Input pharmgkb file: $PHARMGKB"
echo "Path to HLAscan: $HLASCAN"
echo "HLAscan executable: $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py"
......@@ -108,7 +122,7 @@ echo "Output HLAscan: ${OUTPUTDIR}/HLAscan_result/"
echo "Output HTML file: $OUTPUTDIR/Report.html"
#HLASCAN
python $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
python2.7 $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
#create HTML
python create_html.py -v $VCF -s $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
\ No newline at end of file
python2.7 create_html.py -v $VCF -s $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
\ No newline at end of file
......@@ -19,10 +19,10 @@ pysam library. On output, you have a HTML file readable with any web navigator (
Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: genotyping of the HLA region using next-generation sequencing data. BMC bioinformatics, 18(1), 258.
* Python:
* Tested with python2.7 and python3.9
* Python2.7:
* Tested with python 2.7.18
* library: pysam , htmltag (required sphinx)
* [download python](https://www.python.org/download/releases/)
* [download python](https://www.python.org/download/releases/python-2718/)
* PharmGKB csv file
* [download csv file](https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip)
......@@ -33,9 +33,9 @@ Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan:
## Installation
* First install library with pip:
* pip install pysam
* pip install sphinx
* pip install htmltag
* pip2.7 install pysam
* pip2.7 install sphinx
* pip2.7 install htmltag
* Clone this repository
......@@ -54,11 +54,11 @@ https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_
bash -p <PharmGKB file> -b <bam file> -s <path to HLAscan files> -v <vcf file> -o <output dir> Extract_PharmGKB_Variants.sh
Options:
-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site.
-b <bam file>: Bam file provided by alignement.
-s <path to hlascan files>: Path to HLAscan tools download and unzipped.
-v <vcf file>: vcf file provided by variant calling
-o <OUTPUT_PATH>: Output path.
-p <path to PharmGKB file> : Path to PharmGKB tsv file download at PharmGKB site.
-b <path to bam file> : Path to bam file provided by alignement.
-s <path to hlascan files> : Path to HLAscan tools download and unzipped.
-v <path to vcf file> : Path to vcf file provided by variant calling
-o <output_path>: Output path.
-h print help
#### Data test
......
......@@ -96,9 +96,11 @@ def writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,variant,body):
def extractRS(data,vcfSNP,DictRs,body):
""" rs information extraction """
countSNV=0
vcf_reader_SNP = VariantFile(vcfSNP)
for record in vcf_reader_SNP.fetch():
if record.id in DictRs:
countSNV+=1
for Drug in DictRs[record.id]:
for Pheno in DictRs[record.id][Drug]:
gene = DictRs[record.id][Drug][Pheno]["Gene"]
......@@ -107,6 +109,7 @@ def extractRS(data,vcfSNP,DictRs,body):
Publi = DictRs[record.id][Drug][Pheno]["Publi"]
body =writeBodyRS(Pheno,Drug,gene,LevelEvi,Publi,LevelModi,record.id,body)
body = body.append(htmltag.H3("Number of SNV found = "+str(countSNV)))
return body
......@@ -244,6 +247,12 @@ def main():
#execute
dictRs,dictHLA = parsePharmGKB(tsvfile)
if len(dictRs.keys()) == 0:
print("WARNING : no rs variant found in PharmGKB file")
if len(dictHLA.keys()) == 0:
print("WARNING : no HLA variant found in PharmGKB file")
head,body = contructHeader()
body = extractRS(tsvfile,vcfSNP,dictRs,body)
body = extractHLA(report,dictHLA,body)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment