Change variable names

parent 63e81b42
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
## last revision date : 20191126 ## last revision date : 20191126
## Known bugs : None ## Known bugs : None
INPUTFILE=/work/gad/shared/analyse/STR/Data/dijen017/dijen017/dijen017.bam INPUTFILE=/work/gad/shared/analyse/STR/pipeline/dijen073/dijen073.bam
DATE="$(date +"%F_%H-%M-%S")" DATE="$(date +"%F_%H-%M-%S")"
OUTPUTDIR="/work/gad/shared/analyse/STR/ExpansionHunterDeNovo/$DATE" OUTPUTDIR="/work/gad/shared/analyse/STR/ExpansionHunterDeNovo/$DATE"
OUTPUTPREFIX="$OUTPUTDIR/$(basename "$INPUTFILE")_$DATE" OUTPUTPREFIX="$OUTPUTDIR/$(basename "$INPUTFILE")_$DATE"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
SCRIPT="$(dirname "$(readlink -f "$0")")/triplets_outliers.py" SCRIPT="$(dirname "$(readlink -f "$0")")/triplets_outliers.py"
cd '/work/gad/shared/analyse/STR/results2020-01-09' || exit 1 cd '/work/gad/shared/analyse/STR/results' || exit 1
for locus_tsv in $(ls *.tsv | grep -v outliers); do for locus_tsv in $(ls *.tsv | grep -v outliers); do
locus="$(basename "$locus_tsv" ".tsv")" locus="$(basename "$locus_tsv" ".tsv")"
echo "Processing $locus" >&2 echo "Processing $locus" >&2
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
SCRIPT="$(dirname "$(readlink -f "$0")")/triplets_plotly.py" SCRIPT="$(dirname "$(readlink -f "$0")")/triplets_plotly.py"
cd '/work/gad/shared/analyse/STR/results2020-01-09' || exit 1 cd '/work/gad/shared/analyse/STR/results' || exit 1
for locus_tsv in *.tsv; do for locus_tsv in *.tsv; do
locus="$(basename "$locus_tsv" ".tsv")" locus="$(basename "$locus_tsv" ".tsv")"
echo "Processing $locus" >&2 echo "Processing $locus" >&2
......
#! /bin/sh #! /bin/sh
### ASDP PIPELINE ### ### ASDP PIPELINE ###
## pipeline.sh
## Version : 0.0.1 ## Version : 0.0.1
## Licence : FIXME ## Licence : FIXME
## Description : script to launch the pipeline for STR detection ## Description : script to launch the pipeline for STR detection
## Usage : ## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr ## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208 ## Creation Date : 20191208
## last revision date : 20191208 ## last revision date : 20200606
## Known bugs : None
# $1 : first argument in the command line : the input file
SAMPLE="$1" SAMPLE="$1"
# Check if sample is specified # Check if sample is specified
......
...@@ -11,16 +11,22 @@ ...@@ -11,16 +11,22 @@
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr ## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20200202 ## Creation Date : 20200202
## last revision date : 20200202 ## last revision date : 20200216
## Known bugs : None ## Known bugs : None
import collections import collections
import csv import csv
import math
import os import os
import scipy.stats import scipy.stats
import sys import sys
path = '/work/gad/shared/analyse/STR/results2020-01-09' path = '/work/gad/shared/analyse/STR/results'
zscore_threshold = 4
zscore_label = f'Z>={zscore_threshold}'
percentile_threshold = 1.0
percentile_label = f'{percentile_threshold}%'
def load_limits(): def load_limits():
limits = {} limits = {}
...@@ -61,8 +67,8 @@ def display_outliers(locus, limits): ...@@ -61,8 +67,8 @@ def display_outliers(locus, limits):
tools_values.setdefault(tool, []) tools_values.setdefault(tool, [])
results[dijen][tool] = collections.OrderedDict() results[dijen][tool] = collections.OrderedDict()
results[dijen][tool]['Limit'] = '.' results[dijen][tool]['Limit'] = '.'
results[dijen][tool]['5 %'] = tool_value results[dijen][tool][percentile_label] = tool_value
results[dijen][tool]['Z score'] = tool_value results[dijen][tool][zscore_label] = tool_value
results[dijen][tool]['< 3'] = '.' results[dijen][tool]['< 3'] = '.'
# > upper limit of normality or < 3 # > upper limit of normality or < 3
...@@ -83,21 +89,22 @@ def display_outliers(locus, limits): ...@@ -83,21 +89,22 @@ def display_outliers(locus, limits):
print('Input file is empty', file=sys.stderr) print('Input file is empty', file=sys.stderr)
sys.exit(1) sys.exit(1)
# 5 % limit # outlier threshold (exemple: 5%)
for tool, tool_values in tools_values.items(): for tool, tool_values in tools_values.items():
# Test if there is at least one value given by the tool
if tool_values: if tool_values:
tool_5p_limit = sorted(tool_values)[-len(tool_values)//20:][0] tool_percentile_limit = sorted(tool_values)[-math.ceil(len(tool_values) * percentile_threshold / 100):][0]
for dijen, dijen_outliers in results.items(): for dijen, dijen_outliers in results.items():
tool_5p_outliers = dijen_outliers[tool]['5 %'] tool_percentile_outliers = dijen_outliers[tool][percentile_label]
actual_outlier = False actual_outlier = False
# count: number of repeats from the input file # count: number of repeats from the input file
for count in tool_5p_outliers.split(','): for count in tool_percentile_outliers.split(','):
if count != '.': if count != '.':
if int(count) >= tool_5p_limit: if int(count) >= tool_percentile_limit:
actual_outlier = True actual_outlier = True
break break
if not actual_outlier: if not actual_outlier:
dijen_outliers[tool]['5 %'] = '.' dijen_outliers[tool][percentile_label] = '.'
# Z score # Z score
for tool, tool_values in tools_values.items(): for tool, tool_values in tools_values.items():
...@@ -110,23 +117,23 @@ def display_outliers(locus, limits): ...@@ -110,23 +117,23 @@ def display_outliers(locus, limits):
actual_outlier = False actual_outlier = False
zscore_outliers = [] zscore_outliers = []
# count: number of repeats from the input file # count: number of repeats from the input file
for count in dijen_outliers[tool]['Z score'].split(','): for count in dijen_outliers[tool][zscore_label].split(','):
if count != '.': if count != '.':
zscore = next(zscores) zscore = next(zscores)
if zscore == '.': if zscore == '.':
zscore_outliers.append('.') zscore_outliers.append('.')
else: else:
zscore_outliers.append(f'{zscore:.3f}') zscore_outliers.append(f'{zscore:.3f}')
if zscore >= 2.0: if zscore >= zscore_threshold:
actual_outlier = True actual_outlier = True
if actual_outlier: if actual_outlier:
dijen_outliers[tool]['Z score'] = ','.join(zscore_outliers) dijen_outliers[tool][zscore_label] = ','.join(zscore_outliers)
else: else:
dijen_outliers[tool]['Z score'] = '.' dijen_outliers[tool][zscore_label] = '.'
# Output # Output
print('dijen\tEH\tEH\tEH\tEH\tTred\tTred\tTred\tTred\tGangSTR\tGangSTR\tGangSTR\tGangSTR') print('dijen\tEH\tEH\tEH\tEH\tTred\tTred\tTred\tTred\tGangSTR\tGangSTR\tGangSTR\tGangSTR')
print('\tLimit\t5 %\tZ score\t< 3' * 3) print(f'\tLimit\t{percentile_label}\t{zscore_label}\t< 3' * 3)
for dijen, dijen_outliers in results.items(): for dijen, dijen_outliers in results.items():
all_outliers = [dijen] all_outliers = [dijen]
dijen_has_outliers = False dijen_has_outliers = False
......
...@@ -19,7 +19,7 @@ import csv ...@@ -19,7 +19,7 @@ import csv
import os import os
import sys import sys
path = '/work/gad/shared/analyse/STR/results2020-01-09' path = '/work/gad/shared/analyse/STR/results'
def display_console_graph(title, tools, data): def display_console_graph(title, tools, data):
print(title) print(title)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment