Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
STR
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Anne-Sophie Denommé-Pichon
STR
Commits
8ebd33d3
Commit
8ebd33d3
authored
Feb 09, 2020
by
Anne-Sophie Denommé-Pichon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Create script to identify outliers from STR results
parent
1e5b9d7d
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
183 additions
and
0 deletions
+183
-0
launch_triplets_outliers.py
plot/launch_triplets_outliers.py
+11
-0
patho.csv
plot/patho.csv
+31
-0
triplets_outliers.py
plot/triplets_outliers.py
+141
-0
No files found.
plot/launch_triplets_outliers.py
0 → 100755
View file @
8ebd33d3
#! /bin/sh
SCRIPT
=
"$(dirname "
$
(
readlink
-
f
"$0"
)
")/triplets_outliers.py"
cd
'/work/gad/shared/analyse/STR/results2020-01-09'
||
exit
1
for
locus_tsv
in
$
(
ls
*.
tsv
|
grep
-
v
outliers
);
do
locus
=
"$(basename "
$
locus_tsv
" "
.
tsv
")"
echo
"Processing $locus"
>&
2
"$SCRIPT"
"$locus"
>
"$locus.outliers.tsv"
done
plot/patho.csv
0 → 100644
View file @
8ebd33d3
Gene;High normal range
AFF2;39
AR;35
ATN1;34
ATXN1;38
ATXN10;20
ATXN2;24
ATXN3;36
ATXN7;35
ATXN8OS;34
C9ORF72;19
CACNA1A;7
CBL;NA
CNBP;26
CSTB;3
DIP2B;NA
DMPK;37
FMR1;45
FXN;32
HTT;34
JPH3;27
NOP56;8
PHOX2B;20
PPP2R2B;45
TBP;42
TCF4;NA
BEAN/TK2;NA
DAB1;NA
SAMD12;NA
ALEA;74
\ No newline at end of file
plot/triplets_outliers.py
0 → 100755
View file @
8ebd33d3
#! /usr/bin/env python3
### ASDP PIPELINE ###
## triplets_outliers.py
## Version : 0.0.1
## Licence : FIXME
## Description : script to get automatically outliers from expansion pipeline results from getResults.py
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20200202
## last revision date : 20200202
## Known bugs : None
import
collections
import
csv
import
os
import
scipy.stats
import
sys
path
=
'/work/gad/shared/analyse/STR/results2020-01-09'
def
load_limits
():
limits
=
{}
with
open
(
f
'{sys.argv[0].rsplit("/", 1)[0]}{os.sep}patho.csv'
)
as
limits_file
:
csvreader
=
csv
.
reader
(
limits_file
,
delimiter
=
';'
)
try
:
next
(
csvreader
)
for
row
in
csvreader
:
if
row
[
1
]
!=
'NA'
:
limits
[
row
[
0
]]
=
int
(
row
[
1
])
return
limits
except
StopIteration
:
print
(
'Limits file is empty'
,
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
def
display_outliers
(
locus
,
limits
):
# results = {
# 'dijen': {
# 'tool': {
# 'Limit': 42,
# '5 %': 42,
# 'Z score': 42
# }
# }
# }
results
=
collections
.
OrderedDict
()
tools_values
=
{}
with
open
(
f
'{path}{os.sep}{locus}.tsv'
)
as
result_file
:
tsvreader
=
csv
.
reader
(
result_file
,
delimiter
=
'
\t
'
)
try
:
tools
=
next
(
tsvreader
)[
1
:]
for
row
in
tsvreader
:
dijen
=
row
[
0
]
results
[
dijen
]
=
collections
.
OrderedDict
()
for
tool_id
,
tool
in
enumerate
(
tools
):
tool_value
=
row
[
tool_id
+
1
]
.
replace
(
'nofile'
,
'.'
)
.
replace
(
'-1'
,
'.'
)
tools_values
.
setdefault
(
tool
,
[])
results
[
dijen
][
tool
]
=
collections
.
OrderedDict
()
results
[
dijen
][
tool
][
'Limit'
]
=
'.'
results
[
dijen
][
tool
][
'5
%
'
]
=
tool_value
results
[
dijen
][
tool
][
'Z score'
]
=
tool_value
if
tool_value
!=
'.'
:
# count: number of repeats from the input file
for
count
in
tool_value
.
split
(
','
):
if
count
!=
'.'
:
tools_values
[
tool
]
.
append
(
int
(
count
))
if
locus
in
limits
:
if
int
(
count
)
>
limits
[
locus
]:
results
[
dijen
][
tool
][
'Limit'
]
=
tool_value
else
:
results
[
dijen
][
tool
][
'Limit'
]
=
'NA'
except
StopIteration
:
print
(
'Input file is empty'
,
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
# 5 % limit
for
tool
,
tool_values
in
tools_values
.
items
():
if
tool_values
:
tool_5p_limit
=
sorted
(
tool_values
)[
-
len
(
tool_values
)
//
20
:][
0
]
for
dijen
,
dijen_outliers
in
results
.
items
():
tool_5p_outliers
=
dijen_outliers
[
tool
][
'5
%
'
]
actual_outlier
=
False
# count: number of repeats from the input file
for
count
in
tool_5p_outliers
.
split
(
','
):
if
count
!=
'.'
:
if
int
(
count
)
>=
tool_5p_limit
:
actual_outlier
=
True
break
if
not
actual_outlier
:
dijen_outliers
[
tool
][
'5
%
'
]
=
'.'
# Z score
for
tool
,
tool_values
in
tools_values
.
items
():
if
tool_values
:
if
len
(
tool_values
)
>
1
:
zscores
=
iter
(
scipy
.
stats
.
zscore
(
tool_values
))
else
:
zscores
=
iter
([
'.'
])
for
dijen_outliers
in
results
.
values
():
actual_outlier
=
False
zscore_outliers
=
[]
# count: number of repeats from the input file
for
count
in
dijen_outliers
[
tool
][
'Z score'
]
.
split
(
','
):
if
count
!=
'.'
:
zscore
=
next
(
zscores
)
if
zscore
==
'.'
:
zscore_outliers
.
append
(
'.'
)
else
:
zscore_outliers
.
append
(
f
'{zscore:.3f}'
)
if
True
:
#zscore > 2:
actual_outlier
=
True
if
actual_outlier
:
dijen_outliers
[
tool
][
'Z score'
]
=
','
.
join
(
zscore_outliers
)
else
:
dijen_outliers
[
tool
][
'Z score'
]
=
'.'
# Output
print
(
'dijen
\t
EH
\t
EH
\t
EH
\t
Tred
\t
Tred
\t
Tred
\t
GangSTR
\t
GangSTR
\t
GangSTR'
)
print
(
'
\t
Limit
\t
5
%
\t
Z score'
*
3
)
for
dijen
,
dijen_outliers
in
results
.
items
():
all_outliers
=
[
dijen
]
dijen_has_outliers
=
False
for
tool
,
tool_outliers
in
dijen_outliers
.
items
():
if
tool_outliers
:
dijen_has_outliers
=
True
all_outliers
.
extend
(
tool_outliers
.
values
())
else
:
all_outliers
.
append
(
'.'
)
if
dijen_has_outliers
:
print
(
'
\t
'
.
join
(
all_outliers
))
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
!=
2
:
print
(
f
'Usage: {sys.argv[0].split(os.sep)[-1]} <LOCUS>'
,
file
=
sys
.
stderr
)
sys
.
exit
(
1
)
limits
=
load_limits
()
display_outliers
(
sys
.
argv
[
1
],
limits
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment