Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
pharmAnnot
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
gad-public
pharmAnnot
Commits
3c5a5a82
Commit
3c5a5a82
authored
6 years ago
by
Simon Verdez
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
pipeline works
parent
5f8e477e
master
dev
public
No related merge requests found
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
321 additions
and
10 deletions
+321
-10
Master.sh
Master.sh
+15
-7
create_html.py
create_html.py
+302
-0
merge_GATK.sh
merge_GATK.sh
+3
-3
pharmconfig.tsv
pharmconfig.tsv
+1
-0
No files found.
Master.sh
View file @
3c5a5a82
...
@@ -66,7 +66,7 @@ PHARMGKB=`grep "pharmagkbase" $PHARMCONFIG | cut -f2`
...
@@ -66,7 +66,7 @@ PHARMGKB=`grep "pharmagkbase" $PHARMCONFIG | cut -f2`
OMIM
=
`
grep
"omim"
$PHARMCONFIG
|
cut
-f2
`
OMIM
=
`
grep
"omim"
$PHARMCONFIG
|
cut
-f2
`
HLALIST
=
`
grep
"HLAlist"
$PHARMCONFIG
|
cut
-f2
`
HLALIST
=
`
grep
"HLAlist"
$PHARMCONFIG
|
cut
-f2
`
HEADER
=
`
grep
"header"
$PHARMCONFIG
|
cut
-f2
`
HEADER
=
`
grep
"header"
$PHARMCONFIG
|
cut
-f2
`
REF
=
`
grep
"reference"
$PHARMCONFIG
|
cut
-f2
`
# python path for proper execution of python
# python path for proper execution of python
PYTHONPATH
=
$PIPELINEBASE
/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/
PYTHONPATH
=
$PIPELINEBASE
/common:/work/gad/shared/bin/lib/python_2.7/lib/python2.7/site-packages:/work/gad/shared/bin/miniconda2/lib/python2.7/site-packages/
...
@@ -104,11 +104,11 @@ else
...
@@ -104,11 +104,11 @@ else
fi
fi
# get the status
# get the status
REF
=
`
grep
"reference"
$STATUSFILE
|
cut
-f2
`
HLAscan
=
`
grep
"process_HLAscan"
$STATUSFILE
|
cut
-f2
`
HLAscan
=
`
grep
"process_HLAscan"
$STATUSFILE
|
cut
-f2
`
varcallSNP
=
`
grep
"process_varcallSNP"
$STATUSFILE
|
cut
-f2
`
varcallSNP
=
`
grep
"process_varcallSNP"
$STATUSFILE
|
cut
-f2
`
varcallCNV
=
`
grep
"process_varcallCNV"
$STATUSFILE
|
cut
-f2
`
varcallCNV
=
`
grep
"process_varcallCNV"
$STATUSFILE
|
cut
-f2
`
createHTML
=
`
grep
"process_createHTML"
$STATUSFILE
|
cut
-f2
`
createHTML
=
`
grep
"process_createHTML"
$STATUSFILE
|
cut
-f2
`
mergeVCF
=
`
grep
"mergeVCF"
$STATUSFILE
|
cut
-f2
`
if
[
"
$HLAscan
"
==
"FAIL"
]
if
[
"
$HLAscan
"
==
"FAIL"
]
then
then
...
@@ -139,6 +139,14 @@ else
...
@@ -139,6 +139,14 @@ else
echo
"CreateHTML step was OK"
echo
"CreateHTML step was OK"
fi
fi
if
[
"
$mergeVCF
"
==
"FAIL"
]
then
echo
"MergeVCF step failed : jumping to merge step"
jumpto MergeVCF
else
echo
"MerfeVCF step was OK"
fi
echo
"pipeline was correctly executed, no need to relaunch a step"
echo
"pipeline was correctly executed, no need to relaunch a step"
exit
0
exit
0
fi
fi
...
@@ -172,7 +180,7 @@ echo "### Execute HLAscan ###"
...
@@ -172,7 +180,7 @@ echo "### Execute HLAscan ###"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
for
currentSample
in
$samples
for
currentSample
in
$samples
do
do
echo
"Command : qsub -pe smp
1
-N HLAscan_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v INPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/HLAscan,GeneList=
$GL
,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/process_HLAscan.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/haplo_scan.sh"
echo
"Command : qsub -pe smp
2
-N HLAscan_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v INPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/HLAscan,GeneList=
$GL
,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/process_HLAscan.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/haplo_scan.sh"
qsub
-pe
smp 1
-N
HLAscan_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
BAM
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,SORTIE
=
$ANALYSISDIR
/
$currentSample
/HLAscan,GeneList
=
$GL
,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/process_HLAscan.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/haplo_scan.sh
qsub
-pe
smp 1
-N
HLAscan_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
BAM
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,SORTIE
=
$ANALYSISDIR
/
$currentSample
/HLAscan,GeneList
=
$GL
,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/process_HLAscan.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/haplo_scan.sh
done
done
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
...
@@ -208,8 +216,8 @@ echo "### Generate HTML report ###"
...
@@ -208,8 +216,8 @@ echo "### Generate HTML report ###"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
for
currentSample
in
$samples
for
currentSample
in
$samples
do
do
echo
"Command : qsub -pe smp 1 -
N createHTML_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v VCF_SNP=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,VCF_CNV=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.CNV.vcf,REPORTFILE=
$ANALYSISDIR
/
$currentSample
/HLAscan/Report,BAMFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,TSVFILE=
$TSVFILE
,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.html,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/process_varcal
l.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/wrapper_create_html.sh"
echo
"Command : qsub -pe smp 1 -
hold_jid varcallCNV_
${
currentSample
}
,varcallSNP_
${
currentSample
}
,HLAscan_
${
currentSample
}
-N createHTML_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v VCF_SNP=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,VCF_CNV=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.CNV.vcf,REPORTFILE=
$ANALYSISDIR
/
$currentSample
/HLAscan/Report,BAMFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,TSVFILE=
$TSVFILE
,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.html,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/create_htm
l.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/wrapper_create_html.sh"
qsub
-pe
smp 1
-
N
createHTML_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
VCFFILE_SNP
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,VCFFILE_CNV
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.CNV.vcf,REPORTFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.report,BAMFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.bam,TSVFILE
=
$TSVFILE
,OUTPUTFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.html,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/process_varcal
l.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/wrapper_create_html.sh
qsub
-pe
smp 1
-
hold_jid
varcallCNV_
${
currentSample
}
,varcallSNP_
${
currentSample
}
,HLAscan_
${
currentSample
}
-N
createHTML_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
VCF_SNP
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,VCF_CNV
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.CNV.vcf,REPORTFILE
=
$ANALYSISDIR
/
$currentSample
/HLAscan/Report,BAMFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.dedup.bam,TSVFILE
=
$TSVFILE
,OUTPUTFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.html,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/create_htm
l.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/wrapper_create_html.sh
done
done
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
...
@@ -219,7 +227,7 @@ echo "### Merge VCF files"
...
@@ -219,7 +227,7 @@ echo "### Merge VCF files"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"Start :
$(
date
+
"%F_%H-%M-%S"
)
"
for
currentSample
in
$samples
for
currentSample
in
$samples
do
do
echo
"Command : qsub -pe smp 1 -
N mergeVCF_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v INPUTFILE1=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.CNV.vcf,INPUTFILE2=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,REF=
$REF
,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.merge.vcf,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/process_varcall
.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/merge_GATK.sh"
echo
"Command : qsub -pe smp 1 -
hold_jid varcallCNV_
${
currentSample
}
,varcallSNP_
${
currentSample
}
-N mergeVCF_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/ -e
$ANALYSISDIR
/
$currentSample
/logs/ -v INPUTFILE1=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.CNV.vcf,INPUTFILE2=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,REF=
$REF
,OUTPUTFILE=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.merge.vcf,LOGFILE=
$ANALYSISDIR
/
$currentSample
/logs/merge_file
.
$logbasename
.log,CONFIGFILE=
$CONFIGFILE
$PIPELINEBASE
/merge_GATK.sh"
qsub
-pe
smp 1
-
N
mergeVCF_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
INPUTFILE1
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.CNV.vcf,INPUTFILE2
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,REF
=
$REF
,OUTPUTFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.vcf,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/process_varcall
.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/merge_GATK.sh
qsub
-pe
smp 1
-
hold_jid
varcallCNV_
${
currentSample
}
,varcallSNP_
${
currentSample
}
-N
mergeVCF_
${
currentSample
}
-o
$ANALYSISDIR
/
$currentSample
/logs/
-e
$ANALYSISDIR
/
$currentSample
/logs/
-v
INPUTFILE1
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.CNV.vcf,INPUTFILE2
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.pharmacoAnnot.SNP.vcf,REF
=
$REF
,OUTPUTFILE
=
$ANALYSISDIR
/
$currentSample
/
$currentSample
.merge.vcf,LOGFILE
=
$ANALYSISDIR
/
$currentSample
/logs/merge_file
.
$logbasename
.log,CONFIGFILE
=
$CONFIGFILE
$PIPELINEBASE
/merge_GATK.sh
done
done
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
echo
"End :
$(
date
+
"%F_%H-%M-%S"
)
"
This diff is collapsed.
Click to expand it.
create_html.py
0 → 100755
View file @
3c5a5a82
#!/usr/bin/python
### PharmAnnot PIPELINE ###
## create_html_v2.py
## Version : 1.0.0
## Description : Create a html file with vcf file, BAM file and report HLAscan
## Usage : python create_html_v2.py -v vcf_SNP -c vcf_CNV -h report_HLA -b BAM -t tsvfile -o html -e log
## Output : Annotated vcf file
## Requirements : python 2.7+
## Author : Simon.Verdez@u-bourgogne.fr
## Creation Date : 20180731
## last revision date : 20180731
## Known bugs : None
#Example : python create_html_v2.py dijex2000.vcf report_HLA BAM
import
sys
import
getopt
import
subprocess
import
csv
from
pysam
import
VariantFile
import
htmltag
import
logging
from
operator
import
*
# Options
try
:
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
'v:c:h:b:t:o:e'
)
for
opt
,
arg
in
opts
:
if
opt
in
(
"-v"
):
vcfSNP
=
arg
if
opt
in
(
"-c"
):
vcfCNV
=
arg
if
opt
in
(
"-h"
):
report
=
arg
if
opt
in
(
"-b"
):
bam
=
arg
if
opt
in
(
"-t"
):
tsvfile
=
arg
elif
opt
in
(
"-o"
):
sys
.
stdout
=
open
(
arg
,
'w'
)
elif
opt
in
(
"-e"
):
logfile
=
arg
except
getopt
.
GetoptError
:
print
'usage : '
sys
.
exit
(
1
)
# Logging
logging
.
basicConfig
(
filename
=
'
%
s'
%
(
logfile
),
filemode
=
'a'
,
level
=
logging
.
INFO
,
format
=
'
%(asctime)
s
%(levelname)
s -
%(message)
s'
)
logging
.
info
(
'start'
)
# Manage options
if
not
vars
()
.
has_key
(
'vcfSNP'
):
logging
.
exception
(
'No vcf SNP file given'
)
sys
.
exit
(
1
)
if
not
vars
()
.
has_key
(
'vcfCNV'
):
logging
.
exception
(
'No vcf CNV file given'
)
sys
.
exit
(
1
)
if
not
vars
()
.
has_key
(
'report'
):
logging
.
exception
(
'No report HLAscan given'
)
sys
.
exit
(
1
)
if
not
vars
()
.
has_key
(
'bam'
):
logging
.
exception
(
'No bam file given'
)
sys
.
exit
(
1
)
if
not
vars
()
.
has_key
(
'tsvfile'
):
logging
.
exception
(
'No tsv file given'
)
sys
.
exit
(
1
)
# Check files
try
:
f
=
open
(
vcfSNP
,
'r'
)
f
.
close
()
except
IOError
:
logging
.
exception
(
'vcfSNP file does not exist'
)
sys
.
exit
(
1
)
try
:
f
=
open
(
vcfCNV
,
'r'
)
f
.
close
()
except
IOError
:
logging
.
exception
(
'vcfCNV file does not exist'
)
sys
.
exit
(
1
)
try
:
f
=
open
(
report
,
'r'
)
f
.
close
()
except
IOError
:
logging
.
exception
(
'Report HLAscan file does not exist'
)
sys
.
exit
(
1
)
try
:
f
=
open
(
bam
,
'r'
)
f
.
close
()
except
IOError
:
logging
.
exception
(
'BAM file does not exist'
)
sys
.
exit
(
1
)
try
:
f
=
open
(
tsvfile
,
'r'
)
f
.
close
()
except
IOError
:
logging
.
exception
(
'Tsv file does not exist'
)
sys
.
exit
(
1
)
""" construction du formulaire en premier"""
head
=
htmltag
.
HEAD
()
head
=
htmltag
.
append
(
htmltag
.
TITLE
(
'pharmacogenetic'
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"body { padding-left: 15em; font-family: Georgia; color:black }"
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"h2 { padding-left: 15em;font-family: Georgia;color:blue }"
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"ul.navbar { list-style-type: none; padding: 0; margin: 0; position: absolute; top: 2em; left: 1em; width: 13em }"
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"ul.navbar li { background: yellow; margin: 0.5em 0; padding: 1em; border-left: 0.5em solid black }"
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"P1 {color: red }"
))
head
=
head
.
append
(
htmltag
.
STYLE
(
type
=
"text/css"
)
.
append
(
"P2 { color: orange }"
))
body
=
htmltag
.
BODY
()
body
=
body
.
append
(
htmltag
.
UL
(
CLASS
=
"navbar"
)
.
append
(
htmltag
.
UL
(
htmltag
.
LI
(
htmltag
.
A
(
href
=
"https://www.pharmgkb.org"
)
.
append
(
"pharmgkb"
))))
.
append
(
htmltag
.
UL
(
htmltag
.
LI
(
htmltag
.
A
(
href
=
"https://cpicpgx.org"
)
.
append
(
"cpicpgx"
))))
.
append
(
htmltag
.
UL
(
htmltag
.
LI
(
htmltag
.
A
(
href
=
"http://www.pharmacogenetics.fr/"
)
.
append
(
"pharmacogenetics.fr"
)))))
body
=
body
.
append
(
htmltag
.
H2
(
'Variants pharmgkb'
))
"""extraction des donnees """
data
=
tsvfile
vcf_reader_SNP
=
VariantFile
(
vcfSNP
)
vcf_reader_CNV
=
VariantFile
(
vcfCNV
)
BAM_FILE
=
bam
Report_HLA
=
open
(
report
,
"r"
)
for
record
in
vcf_reader_SNP
.
fetch
():
try
:
for
i
in
range
(
len
(
record
.
info
[
"Pharmaco_Annot"
])
/
12
):
try
:
gene
=
repr
(
record
.
info
[
"GENEINFO"
])
except
:
gene
=
"intron"
alt
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
1
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
""
)
pheno
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
3
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
" "
)
publi
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
5
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
" "
)
level
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
7
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
""
)
patho
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
9
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
""
)
molecule
=
record
.
info
[
"Pharmaco_Annot"
][(
i
*
12
)
+
11
]
.
replace
(
")"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
"-"
,
" "
)
.
replace
(
"]"
,
""
)
if
level
.
startswith
(
"1"
):
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P1
(
"pathologie: "
+
patho
))
+
htmltag
.
P
(
htmltag
.
P1
(
"molecule: "
+
molecule
))
+
htmltag
.
P
(
htmltag
.
P1
(
"gene: "
+
gene
))
+
htmltag
.
P
(
htmltag
.
P1
(
"variant: "
+
repr
(
record
.
pos
)
+
" "
+
repr
(
record
.
ref
)
+
">"
+
repr
(
record
.
alts
)
+
" "
+
repr
(
record
.
id
)))
+
htmltag
.
P
(
htmltag
.
P1
(
"base alternative: "
+
alt
))
+
htmltag
.
P
(
htmltag
.
P1
(
"niveau: "
+
level
))
+
htmltag
.
P
(
htmltag
.
P1
(
"phenotype: "
+
pheno
)))
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P1
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
elif
level
.
startswith
(
"2"
):
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P2
(
"pathologie: "
+
patho
))
+
htmltag
.
P
(
htmltag
.
P2
(
"molecule: "
+
molecule
))
+
htmltag
.
P
(
htmltag
.
P2
(
"gene: "
+
gene
))
+
htmltag
.
P
(
htmltag
.
P2
(
"variant: "
+
repr
(
record
.
pos
)
+
" "
+
repr
(
record
.
ref
)
+
">"
+
repr
(
record
.
alts
)
+
" "
+
repr
(
record
.
id
)))
+
htmltag
.
P
(
htmltag
.
P2
(
"base alternative: "
+
alt
))
+
htmltag
.
P
(
htmltag
.
P2
(
"niveau: "
+
level
))
+
htmltag
.
P
(
htmltag
.
P2
(
"phenotype: "
+
pheno
)))
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P2
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
else
:
body
=
body
.
append
(
htmltag
.
P
(
"pathologie: "
+
patho
)
+
htmltag
.
P
(
"molecule: "
+
molecule
)
+
htmltag
.
P
(
"gene: "
+
gene
)
+
htmltag
.
P
(
"variant: "
+
repr
(
record
.
pos
)
+
" "
+
repr
(
record
.
ref
)
+
">"
+
repr
(
record
.
alts
)
+
" "
+
repr
(
record
.
id
))
+
htmltag
.
P
(
"base alternative: "
+
alt
)
+
htmltag
.
P
(
"niveau: "
+
level
)
+
htmltag
.
P
(
htmltag
.
P
(
"phenotype: "
+
pheno
)))
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
)))
body
=
body
.
append
(
htmltag
.
P
(
"#######################################################################"
))
except
:
pass
body
=
body
.
append
(
htmltag
.
H2
(
'Deletion ou duplication'
))
for
record
in
vcf_reader_CNV
.
fetch
():
for
elm
in
record
.
alts
:
if
elm
==
"<DEL>"
:
ListeM
=
[]
ListeD
=
[]
body
=
body
.
append
(
htmltag
.
P
(
"Deletion detectee par xHMM sur le gene "
+
record
.
info
[
"GENE"
]))
with
open
(
data
)
as
tsvfile
:
reader
=
csv
.
DictReader
(
tsvfile
,
dialect
=
'excel-tab'
)
for
row
in
reader
:
if
row
[
'gene'
]
!=
""
:
if
row
[
'drug'
]
==
record
.
info
[
"GENE"
]:
if
row
[
'drug'
]
not
in
ListeM
:
ListeG
.
append
(
row
[
'drug'
])
for
Mol
in
ListeM
:
body
=
body
.
append
(
htmltag
.
P
(
"######Gene ayant une relation avec :"
+
Mol
))
depth_ave
=
0.0
pos_len
=
0
depth_new
=
0
p
=
subprocess
.
Popen
([
"samtools"
,
'depth'
,
BAM_FILE
,
'-r'
,
record
.
chrom
+
':'
+
str
(
record
.
pos
)
+
'-'
\
+
str
(
record
.
stop
)],
stdout
=
subprocess
.
PIPE
)
for
line
in
p
.
stdout
.
readlines
():
item
=
line
.
split
()
if
len
(
item
)
==
3
:
ListeD
.
append
(
item
[
2
])
for
elm
in
ListeD
:
depth_new
=
depth_new
+
int
(
elm
)
depth_ave
=
depth_ave
+
float
(
item
[
2
])
pos_len
=
pos_len
+
(
int
(
record
.
stop
)
-
int
(
record
.
pos
)
+
1
)
depth_ave
=
depth_new
/
float
(
pos_len
)
if
depth_ave
>
40
:
body
=
body
.
append
(
htmltag
.
P
(
"######Detecte heterozygote, profondeur moyenne :"
+
str
(
depth_ave
)))
if
depth_ave
<
10
:
body
=
body
.
append
(
htmltag
.
P
(
"######Detecte homozygote, profondeur moyenne :"
+
str
(
depth_ave
)))
else
:
body
=
body
.
append
(
htmltag
.
P
(
"######Detecte probablement heterozygote, profondeur moyenne :"
+
str
(
depth_ave
)))
if
elm
==
"<DUP>"
:
ListeM
=
[]
body
=
body
.
append
(
htmltag
.
P
(
"Duplication detectee par xHMM sur le gene "
+
record
.
info
[
"GENE"
]))
with
open
(
data
)
as
tsvfile
:
reader
=
csv
.
DictReader
(
tsvfile
,
dialect
=
'excel-tab'
)
for
row
in
reader
:
if
row
[
'gene'
]
!=
""
:
if
row
[
'drug'
]
==
record
.
info
[
"GENE"
]:
if
row
[
'drug'
]
not
in
ListeM
:
ListeG
.
append
(
row
[
'drug'
])
for
Mol
in
ListeM
:
body
=
body
.
append
(
htmltag
.
P
(
"######Gene ayant une relation avec :"
+
Mol
))
HLA
=
Report_HLA
.
readlines
()
ListeA
=
[]
ListeB
=
[]
ListeC
=
[]
ListeDQB1
=
[]
ListeDRB1
=
[]
for
lines
in
HLA
:
if
lines
.
startswith
(
"A"
):
for
ligne
in
lines
.
split
(
"
\t
"
):
if
ligne
.
startswith
(
"A*"
):
ListeA
.
append
(
ligne
)
if
lines
.
startswith
(
"B"
):
for
ligne
in
lines
.
split
(
"
\t
"
):
if
ligne
.
startswith
(
"B*"
):
ListeB
.
append
(
ligne
)
if
lines
.
startswith
(
"C"
):
for
ligne
in
lines
.
split
(
"
\t
"
):
if
ligne
.
startswith
(
"C*"
):
ListeC
.
append
(
ligne
)
if
lines
.
startswith
(
"DQB1"
):
for
ligne
in
lines
.
split
(
"
\t
"
):
if
ligne
.
startswith
(
"DQB1*"
):
ListeDQB1
.
append
(
ligne
)
if
lines
.
startswith
(
"DRB1"
):
for
ligne
in
lines
.
split
(
"
\t
"
):
if
ligne
.
startswith
(
"DRB1*"
):
ListeDRB1
.
append
(
ligne
)
body
=
body
.
append
(
htmltag
.
H2
(
'Resultat genotypage HLA'
))
body
=
body
.
append
(
htmltag
.
P
(
"HLA-A : "
+
ListeA
[
0
]
+
" / "
+
ListeA
[
1
]))
body
=
body
.
append
(
htmltag
.
P
(
"HLA-B : "
+
ListeB
[
0
]
+
" / "
+
ListeB
[
1
]))
body
=
body
.
append
(
htmltag
.
P
(
"HLA-C : "
+
ListeC
[
0
]
+
" / "
+
ListeC
[
1
]))
body
=
body
.
append
(
htmltag
.
P
(
"HLA-DQB1 : "
+
ListeDQB1
[
0
]
+
" / "
+
ListeDQB1
[
1
]))
body
=
body
.
append
(
htmltag
.
P
(
"HLA-DRB1 : "
+
ListeDRB1
[
0
]
+
" / "
+
ListeDRB1
[
1
]))
with
open
(
data
)
as
tsvfile
:
reader
=
csv
.
DictReader
(
tsvfile
,
dialect
=
'excel-tab'
)
for
row
in
reader
:
if
row
[
'gene'
]
!=
""
:
if
row
[
'variants'
]
.
startswith
(
"HLA-A"
):
Adigit
=
row
[
'variants'
]
.
split
(
"*"
)[
1
]
Adigit2
=
Adigit
.
split
(
":"
)[
0
]
+
Adigit
.
split
(
":"
)[
1
]
HL1
=
ListeA
[
0
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
0
]
+
ListeA
[
0
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
1
]
HL2
=
ListeA
[
1
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
0
]
+
ListeA
[
1
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
1
]
if
HL1
==
Adigit2
:
pheno
=
row
[
'effect'
]
publi
=
row
[
'article'
]
level
=
str
(
row
[
'LEVEL'
])
if
level
.
startswith
(
"1"
):
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
level
.
startswith
(
"2"
):
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
else
:
body
=
body
.
append
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
HL2
==
Adigit2
:
pheno
=
row
[
'effect'
]
publi
=
row
[
'article'
]
level
=
str
(
row
[
'LEVEL'
])
if
level
.
startswith
(
"1"
):
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
level
.
startswith
(
"2"
):
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
else
:
body
=
body
.
append
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
row
[
'variants'
]
.
startswith
(
"HLA-B"
):
Bdigit
=
row
[
'variants'
]
.
split
(
"*"
)[
1
]
Bdigit2
=
Bdigit
.
split
(
":"
)[
0
]
+
Bdigit
.
split
(
":"
)[
1
]
HL1
=
ListeB
[
0
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
0
]
+
ListeB
[
0
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
1
]
HL2
=
ListeB
[
1
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
0
]
+
ListeB
[
1
]
.
split
(
"*"
)[
1
]
.
split
(
":"
)[
1
]
if
HL1
==
Bdigit2
:
pheno
=
row
[
'effect'
]
publi
=
row
[
'article'
]
level
=
str
(
row
[
'LEVEL'
])
if
level
.
startswith
(
"1"
):
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
level
.
startswith
(
"2"
):
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
else
:
body
=
body
.
append
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
))
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
HL2
==
Bdigit2
:
pheno
=
row
[
'effect'
]
publi
=
row
[
'article'
]
level
=
str
(
row
[
'LEVEL'
])
if
level
.
startswith
(
"1"
):
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P1
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
if
level
.
startswith
(
"2"
):
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
)))
body
=
body
.
append
(
htmltag
.
P2
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
else
:
body
=
body
.
append
(
htmltag
.
P
(
pheno
)
+
htmltag
.
P
(
level
))
body
=
body
.
append
(
htmltag
.
P
(
htmltag
.
P
(
htmltag
.
A
(
href
=
publi
)
.
append
(
"publication: "
+
publi
))))
print
htmltag
.
HTML
(
head
+
body
)
This diff is collapsed.
Click to expand it.
merge_GATK.sh
View file @
3c5a5a82
...
@@ -81,10 +81,10 @@ fi
...
@@ -81,10 +81,10 @@ fi
JAVA
=
$(
grep
"javacmd"
$CONFIGFILE
|
awk
'{print $2}'
)
JAVA
=
$(
grep
"javacmd"
$CONFIGFILE
|
awk
'{print $2}'
)
PATHSCRIPT
=
$(
grep
"pipelinebase"
$CONFIGFILE
|
awk
'{print $2}'
)
PATHSCRIPT
=
$(
grep
"pipelinebase"
$CONFIGFILE
|
awk
'{print $2}'
)
GATK
=
$(
grep
"GATKbase"
$CONFIGFILE
|
awk
'{print $2}'
)
#
GATK=$(grep "GATKbase" $CONFIGFILE | awk '{print $2}')
echo
"command :
$JAVA
-jar
$GATK
-T CombineVariants -R
$REF
--variant
$INPUTFILE1
--variant
$INPUTFILE2
-o merge.vcf -genotypeMergeOptions UNIQUIFY"
echo
"command :
$JAVA
-jar
/work/gad/shared/bin/GATK_3.8/GenomeAnalysisTK.jar
-T CombineVariants -R
$REF
--variant
$INPUTFILE1
--variant
$INPUTFILE2
-o merge.vcf -genotypeMergeOptions UNIQUIFY"
$JAVA
-jar
$GATK
-T
CombineVariants
-R
$REF
--variant
$INPUTFILE2
--variant
$INPUTFILE2
-o
merge.vcf
-genotypeMergeOptions
UNIQUIFY
>
$OUTPUTFILE
$JAVA
-jar
/work/gad/shared/bin/GATK_3.8/GenomeAnalysisTK.jar
-T
CombineVariants
-R
$REF
--variant
$INPUTFILE2
--variant
$INPUTFILE2
-genotypeMergeOptions
UNIQUIFY
>
$OUTPUTFILE
genotypeMerge_exitcode
=
$?
genotypeMerge_exitcode
=
$?
echo
"GenotypeMerge exit code :
$genotypeMerge_exitcode
"
echo
"GenotypeMerge exit code :
$genotypeMerge_exitcode
"
...
...
This diff is collapsed.
Click to expand it.
pharmconfig.tsv
View file @
3c5a5a82
...
@@ -3,3 +3,4 @@ pharmagkbase /work/gad/sv347413/epilepsie/Pipeline/pharmgkb_v2.vcf
...
@@ -3,3 +3,4 @@ pharmagkbase /work/gad/sv347413/epilepsie/Pipeline/pharmgkb_v2.vcf
pharmagktab /work/gad/sv347413/epilepsie/Pipeline/tableau_pharmgkb.tsv
pharmagktab /work/gad/sv347413/epilepsie/Pipeline/tableau_pharmgkb.tsv
omim /work/gad/sv347413/epilepsie/Pipeline/tableau_OMIM.tsv
omim /work/gad/sv347413/epilepsie/Pipeline/tableau_OMIM.tsv
header /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf
header /work/gad/sv347413/epilepsie/Pipeline/header_CNV.vcf
reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment