Commit 769b466c authored by simon verdez's avatar simon verdez

create the README

parent fc81204a
HLASCAN=''
SNPEFF=''
#todo Dowload
#https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip
#change star notation by dbsnp notation with CPIC recommandations or PharmVar website
#!/bin/bash
#data test
#wget https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_TruSeq_Exome/NIST-hg001-7001-b-gatk.vcf
PHARMGKB=""
HLASCAN=""
BAM=""
VCF=""
OUTPUTDIR=""
#HLASCAN
python2.7 $HLASCAN/HLAscan.v1.0.Files/hla-paper/haplo_scan_v4.0-hla.py $BAMFILE $HLASCAN/HLAscan.v1.0.Files/hla-ref-5gene/gene_list HLAscan_result/
PRINT_HELP=0
while [[ "$#" -gt 0 ]]; do
case $1 in
-p|--pharmgkb) PHARMGKB="$2"; shift ;;
-s|--hlascan) HLASCAN="$2"; shift ;;
-b|--bam) BAM="$2"; shift ;;
-v|--vcf) VCF="$2" ; shift ;;
-o|--out-directory) OUTPUTDIR="$2" ; shift ;;
-h|--help) PRINT_HELP=1 ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done
if [ ${PRINT_HELP} -eq 1 ]
then
echo "Usage:"
echo ""
echo "bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir> -h [show this help]"
echo ""
echo "<Mandatory>: "
echo "-p <PharmGKB file>: Path to PharmGKB tsv file download at PharmGKB site."
echo "-b <bam file>: Bam file provided by alignement."
echo "-s <path to hlascan files>: Path to HLAscan tools download and unzipped."
echo "-v <vcf file>: vcf file provided by variant calling"
echo "-o <OUTPUT_PATH>: Output path."
echo ""
exit 0
fi
# Check pharmgkb file is provided and exists
if [ "${PHARMGKB}" == "" ]
then
1>&2 echo "Missing input pharmgkb tsv file (-p). Abort."
exit 2
elif [ ! -f "${PHARMGKB}" ]
then
1>&2 echo "pharmgkb tsv file not found. Abort."
exit 3
fi
# Check BAM file is provided and exists
if [ "${BAM}" == "" ]
then
1>&2 echo "Missing input BAMs (-b). Abort."
exit 4
elif [ ! -f "${BAM}" ]
then
1>&2 echo "BAM file not found. Abort."
exit 5
fi
#Annotate VCF File
bash -o pipefail -c "java -Xmx4g -jar $SNPEFF/SnpSift.jar annotate pharmgkb_v2.vcf $VCF > ${VCF}_annot"
# Check hlascan dir is provided and exists
if [ "${HLASCAN}" == "" ]
then
1>&2 echo "Missing input HLAscan path (-s). Abort."
exit 6
elif [ ! -d "${HLASCAN}" ]
then
1>&2 echo "HLAscan path not found. Abort."
exit 7
fi
# Check vcf file is provided and exists
if [ "${VCF}" == "" ]
then
1>&2 echo "Missing input vcf (-v). Abort."
exit 8
elif [ ! -f "${VCF}" ]
then
1>&2 echo "vcf file not found. Abort."
exit 9
fi
# Check output dir is provided and exists
if [ "${OUTPUTDIR}" == "" ]
then
1>&2 echo "Missing input output dir path (-s). Abort."
exit 10
elif [ ! -d "${OUTPUTDIR}" ]
then
1>&2 echo "Output dir path not found. Abort."
exit 11
fi
echo "Input pharmgkb file: $PHARMGKB"
echo "Path to HLAscan: $HLASCAN"
echo "HLAscan executable: $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py"
echo "Gene list for HLAscan: $HLASCAN/hla-ref-5gene/gene_list"
echo "Input bam file: $BAM"
echo "Input vcf file: $VCF"
echo "Output directory: ${OUTPUTDIR}"
echo "Output HLAscan: ${OUTPUTDIR}/HLAscan_result/"
echo "Output HTML file: $OUTPUTDIR/Report.html"
#HLASCAN
python2.7 $HLASCAN/hla-paper/haplo_scan_v4.0-hla.py $BAM $HLASCAN/hla-ref-5gene/gene_list $OUTPUTDIR/HLAscan_result/ 1>> $OUTPUTDIR/HLAscan_log.txt 2>> $OUTPUTDIR/HLAscan_log.txt
#create HTML
python2.7 create_html.py -v ${VCF}_annot -h HLAscan_result/Report -b $BAMFILE -t tableau_pharmgkb.tsv -o Report.html
python2.7 create_html.py -v $VCF -h $OUTPUTDIR/HLAscan_result/Report -t $PHARMGKB -o $OUTPUTDIR/Report.html
# Extract PharmGKB variants
### A simple way to extract SNV and HLA alleles from exome data
This script extract known variants in PharmGKB database from exome data. It is based on two steps, first HLA genotyping with HLAscan and SNV extraction with
pysam library. On output, you have a HTML file readable with any web navigator (Internet Explorer, Firefox or Chrome)
## Table of Contents
* [Requirements](#requirements)
* [Installation](#installation)
* [Usage](#usage)
## Requirements
* HLAscan v1.0:
* [download zip file](https://github.com/SyntekabioTools/HLAscan/releases/download/v1.0.0/HLAscan.v1.0.Files.zip)
* [Publication](https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-017-1671-3)
Reference : Ka, S., Lee, S., Hong, J., Cho, Y., ... & Jung, J. (2017). HLAscan: genotyping of the HLA region using next-generation sequencing data. BMC bioinformatics, 18(1), 258.
* Python2.7:
* library: pysam , htmltag (required sphinx)
* [download python2.7](https://www.python.org/download/releases/2.7/)
* PharmGKB csv file
* [download csv file](https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip)
* we will use the **clinical_annotations.tsv** for the next step
* Change all star alleles by DBSnp variants if possible, example : CYP2C9*2 become rs1799853
* Use [CPIC](https://cpicpgx.org/) recommandations or [PharmVar](https://www.pharmvar.org/) website for the conversion
## Installation
* First install library with pip:
* pip2.7 install pysam
* pip2.7 install sphinx
* pip2.7 install htmltag
* Clone this repository
## Usage
### Data test
* NA12878 data test :
* You can dowload vcf file provided by exome sequencing at this link :
https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/NA12878/Nebraska_NA12878_HG001_TruSeq_Exome/NIST-hg001-7001-b-gatk.vcf
* Bam file is provided with HLAscan
### Execution
bash -p <PharmGKB file> -b <bam file> -s <path to hlascan files> -v <vcf file> -o <output dir>
......@@ -240,7 +240,7 @@ def main():
f = open(tsvfile, 'r')
f.close()
except IOError:
print('Tsv file does not exist')
print('PharmGKB tsv file does not exist')
sys.exit(1)
#execute
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment