Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
STR
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Anne-Sophie Denommé-Pichon
STR
Commits
e4c144bc
Commit
e4c144bc
authored
Jun 06, 2020
by
Anne-Sophie Denommé-Pichon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change variable names
parent
63e81b42
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
28 additions
and
24 deletions
+28
-24
launch_ehdn.sh
launch_ehdn.sh
+1
-1
launch_triplets_outliers.py
launch_triplets_outliers.py
+1
-1
launch_triplets_plotly.sh
launch_triplets_plotly.sh
+1
-1
pipeline.sh
pipeline.sh
+2
-5
triplets_outliers.py
triplets_outliers.py
+22
-15
triplets_plotly.py
triplets_plotly.py
+1
-1
No files found.
launch_ehdn.sh
View file @
e4c144bc
...
...
@@ -14,7 +14,7 @@
## last revision date : 20191126
## Known bugs : None
INPUTFILE
=
/work/gad/shared/analyse/STR/
Data/dijen017/dijen017/dijen017
.bam
INPUTFILE
=
/work/gad/shared/analyse/STR/
pipeline/dijen073/dijen073
.bam
DATE
=
"
$(
date
+
"%F_%H-%M-%S"
)
"
OUTPUTDIR
=
"/work/gad/shared/analyse/STR/ExpansionHunterDeNovo/
$DATE
"
OUTPUTPREFIX
=
"
$OUTPUTDIR
/
$(
basename
"
$INPUTFILE
"
)
_
$DATE
"
...
...
launch_triplets_outliers.py
View file @
e4c144bc
...
...
@@ -2,7 +2,7 @@
SCRIPT
=
"$(dirname "
$
(
readlink
-
f
"$0"
)
")/triplets_outliers.py"
cd
'/work/gad/shared/analyse/STR/results
2020-01-09
'
||
exit
1
cd
'/work/gad/shared/analyse/STR/results'
||
exit
1
for
locus_tsv
in
$
(
ls
*.
tsv
|
grep
-
v
outliers
);
do
locus
=
"$(basename "
$
locus_tsv
" "
.
tsv
")"
echo
"Processing $locus"
>&
2
...
...
launch_triplets_plotly.sh
View file @
e4c144bc
...
...
@@ -2,7 +2,7 @@
SCRIPT
=
"
$(
dirname
"
$(
readlink
-f
"
$0
"
)
"
)
/triplets_plotly.py"
cd
'/work/gad/shared/analyse/STR/results
2020-01-09
'
||
exit
1
cd
'/work/gad/shared/analyse/STR/results'
||
exit
1
for
locus_tsv
in
*
.tsv
;
do
locus
=
"
$(
basename
"
$locus_tsv
"
".tsv"
)
"
echo
"Processing
$locus
"
>
&2
...
...
pipeline.sh
View file @
e4c144bc
#! /bin/sh
### ASDP PIPELINE ###
## pipeline.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the pipeline for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
## last revision date : 20200606
# $1 : first argument in the command line : the input file
SAMPLE
=
"
$1
"
# Check if sample is specified
...
...
triplets_outliers.py
View file @
e4c144bc
...
...
@@ -11,16 +11,22 @@
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20200202
## last revision date : 202002
02
## last revision date : 202002
16
## Known bugs : None
import
collections
import
csv
import
math
import
os
import
scipy.stats
import
sys
path
=
'/work/gad/shared/analyse/STR/results2020-01-09'
path
=
'/work/gad/shared/analyse/STR/results'
zscore_threshold
=
4
zscore_label
=
f
'Z>={zscore_threshold}'
percentile_threshold
=
1.0
percentile_label
=
f
'{percentile_threshold}
%
'
def
load_limits
():
limits
=
{}
...
...
@@ -61,8 +67,8 @@ def display_outliers(locus, limits):
tools_values
.
setdefault
(
tool
,
[])
results
[
dijen
][
tool
]
=
collections
.
OrderedDict
()
results
[
dijen
][
tool
][
'Limit'
]
=
'.'
results
[
dijen
][
tool
][
'5
%
'
]
=
tool_value
results
[
dijen
][
tool
][
'Z score'
]
=
tool_value
results
[
dijen
][
tool
][
percentile_label
]
=
tool_value
results
[
dijen
][
tool
][
zscore_label
]
=
tool_value
results
[
dijen
][
tool
][
'< 3'
]
=
'.'
# > upper limit of normality or < 3
...
...
@@ -83,21 +89,22 @@ def display_outliers(locus, limits):
print
(
'Input file is empty'
,
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
#
5 % limit
#
outlier threshold (exemple: 5%)
for
tool
,
tool_values
in
tools_values
.
items
():
# Test if there is at least one value given by the tool
if
tool_values
:
tool_
5p_limit
=
sorted
(
tool_values
)[
-
len
(
tool_values
)
//
20
:][
0
]
tool_
percentile_limit
=
sorted
(
tool_values
)[
-
math
.
ceil
(
len
(
tool_values
)
*
percentile_threshold
/
100
)
:][
0
]
for
dijen
,
dijen_outliers
in
results
.
items
():
tool_
5p_outliers
=
dijen_outliers
[
tool
][
'5
%
'
]
tool_
percentile_outliers
=
dijen_outliers
[
tool
][
percentile_label
]
actual_outlier
=
False
# count: number of repeats from the input file
for
count
in
tool_
5p
_outliers
.
split
(
','
):
for
count
in
tool_
percentile
_outliers
.
split
(
','
):
if
count
!=
'.'
:
if
int
(
count
)
>=
tool_
5p
_limit
:
if
int
(
count
)
>=
tool_
percentile
_limit
:
actual_outlier
=
True
break
if
not
actual_outlier
:
dijen_outliers
[
tool
][
'5
%
'
]
=
'.'
dijen_outliers
[
tool
][
percentile_label
]
=
'.'
# Z score
for
tool
,
tool_values
in
tools_values
.
items
():
...
...
@@ -110,23 +117,23 @@ def display_outliers(locus, limits):
actual_outlier
=
False
zscore_outliers
=
[]
# count: number of repeats from the input file
for
count
in
dijen_outliers
[
tool
][
'Z score'
]
.
split
(
','
):
for
count
in
dijen_outliers
[
tool
][
zscore_label
]
.
split
(
','
):
if
count
!=
'.'
:
zscore
=
next
(
zscores
)
if
zscore
==
'.'
:
zscore_outliers
.
append
(
'.'
)
else
:
zscore_outliers
.
append
(
f
'{zscore:.3f}'
)
if
zscore
>=
2.0
:
if
zscore
>=
zscore_threshold
:
actual_outlier
=
True
if
actual_outlier
:
dijen_outliers
[
tool
][
'Z score'
]
=
','
.
join
(
zscore_outliers
)
dijen_outliers
[
tool
][
zscore_label
]
=
','
.
join
(
zscore_outliers
)
else
:
dijen_outliers
[
tool
][
'Z score'
]
=
'.'
dijen_outliers
[
tool
][
zscore_label
]
=
'.'
# Output
print
(
'dijen
\t
EH
\t
EH
\t
EH
\t
EH
\t
Tred
\t
Tred
\t
Tred
\t
Tred
\t
GangSTR
\t
GangSTR
\t
GangSTR
\t
GangSTR'
)
print
(
'
\t
Limit
\t
5
%
\t
Z score
\t
< 3'
*
3
)
print
(
f
'
\t
Limit
\t
{percentile_label}
\t
{zscore_label}
\t
< 3'
*
3
)
for
dijen
,
dijen_outliers
in
results
.
items
():
all_outliers
=
[
dijen
]
dijen_has_outliers
=
False
...
...
triplets_plotly.py
View file @
e4c144bc
...
...
@@ -19,7 +19,7 @@ import csv
import
os
import
sys
path
=
'/work/gad/shared/analyse/STR/results
2020-01-09
'
path
=
'/work/gad/shared/analyse/STR/results'
def
display_console_graph
(
title
,
tools
,
data
):
print
(
title
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment