Add variables to make scripts more generic

parent 6cccde09
......@@ -3,13 +3,13 @@
- ASDP PIPELINE
- Author: anne-sophie.denomme-pichon@u-bourgogne.fr
- Version: 0.0.1
- Licence: FIXME
- Licence: AGPLv3
- Description: How to launch scripts to get STR genotype from genomes on all the locus tested
1. Create `genomes.list`
2. Specify output file in `launch_pipeline.sh`. Warning, don't overwrite existing files
3. Launch `launch_pipeline.sh`: `nohup ./launch_pipeline.sh &`. Dependencies:
- `genomes.list`
1. Create `samples.list`
2. Fill the configuration file `config.sh`. Warning, don't overwrite existing files
3. Launch `launch_pipeline.sh` : `nohup ./launch_pipeline.sh samples.list &`. Dependencies :
- `samples.list`
- `pipeline.sh`
- `wrapper_delete.sh`
- `wrapper_ehdn.sh`
......@@ -23,4 +23,4 @@
7. Specify input directory in `triplets_outliers.py` and in `launch_triplets_outliers.sh`.
8. Change z-score threshold if necessary in `triplets_outliers.py`.
9. Launch `launch_triplets_outliers.sh`. Dependency: `patho.csv`.
10. Get files (i.e.: `scp 'an1770de@ssh-ccub.u-bourgogne.fr:/work/gad/shared/analyse/STR/results/*' .`)
\ No newline at end of file
10. Get files (i.e.: `scp 'an1770de@ssh-ccub.u-bourgogne.fr:/work/gad/shared/analyse/STR/results/*' .`)
#! /bin/sh
### ASDP PIPELINE ###
## launch_pipeline.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the pipeline for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20200215
## Known bugs : None
/work/gad/shared/bin/parallel/parallel-20150522-1.el7.cern/bin/parallel \
--jobs 16 \
## Version: 0.0.1
## Licence: AGPLV3
## Author: anne-sophie.denomme-pichon@u-bourgogne.fr
## Description: script to launch the pipeline for STR detection. Receive multiple samples: one sample per line
# $1 : first argument in the command line : a list containing one sample per line, for exemple samples.list
SAMPLES="$1"
# Check if sample is specified
if [ -z "$SAMPLES" ]
then
echo "List of samples is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
# Source configuration file
. "$(dirname "$0")/config.sh"
# Parallel allow to parallelize the processing of multiple samples
"$PARALLEL" \
--jobs "$PARALLEL_JOB_COUNT" \
--line-buffer \
"$(dirname "$0")/pipeline.sh" \
< genomes.list
< "$SAMPLES"
......@@ -18,31 +18,34 @@ then
exit 1
fi
INPUTFILE="/archive/gad/shared/bam_new_genome_temp/$SAMPLE.bam"
# Source the configuration file
. "$(dirname "$0")/config.sh"
INPUTFILE="INPUTDIR/$SAMPLE.bam"
DATE="$(date +"%F_%H-%M-%S")"
OUTPUTDIR="/work/gad/shared/analyse/STR/pipeline/$SAMPLE"
OUTPUTDIR="OUTPUTDIR/$SAMPLE"
# Transfer bam and bai from archive to work
mkdir -p "$OUTPUTDIR"
qsub -pe smp 1 -q transfer -N "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR",LOGFILE="$OUTPUTDIR/transfer_$DATE.log" wrapper_transfer.sh
qsub -pe smp 1 -q "$TRANSFERQUEUE" -N "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR",LOGFILE="$OUTPUTDIR/transfer_$DATE.log" wrapper_transfer.sh
INPUTFILE="$OUTPUTDIR/$SAMPLE.bam"
# Launch ExpansionHunter
mkdir -p "$OUTPUTDIR/eh"
qsub -pe smp 4 -q batch -N "eh_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/eh/$SAMPLE",LOGFILE="$OUTPUTDIR/eh/$DATE.log" wrapper_expansionhunter.sh
qsub -pe smp 4 -q "$COMPUTEQUEUE" -N "eh_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/eh/$SAMPLE",LOGFILE="$OUTPUTDIR/eh/$DATE.log" wrapper_expansionhunter.sh
# Launch Tredparse
mkdir -p "$OUTPUTDIR/tredparse"
qsub -pe smp 4 -q batch -N "tredparse_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR/tredparse",LOGFILE="$OUTPUTDIR/tredparse/$DATE.log" wrapper_tredparse.sh
qsub -pe smp 4 -q "$COMPUTEQUEUE" -N "tredparse_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR/tredparse",LOGFILE="$OUTPUTDIR/tredparse/$DATE.log" wrapper_tredparse.sh
# Launch GangSTR
mkdir -p "$OUTPUTDIR/gangstr"
qsub -pe smp 4 -q batch -N "gangstr_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/gangstr/$SAMPLE",LOGFILE="$OUTPUTDIR/gangstr/$DATE.log" wrapper_gangstr.sh
qsub -pe smp 4 -q "$COMPUTEQUEUE" -N "gangstr_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/gangstr/$SAMPLE",LOGFILE="$OUTPUTDIR/gangstr/$DATE.log" wrapper_gangstr.sh
# Launch ehdn profile
mkdir -p "$OUTPUTDIR/ehdn"
qsub -pe smp 4 -q batch -N "ehdn_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/ehdn/$SAMPLE",LOGFILE="$OUTPUTDIR/ehdn/$DATE.log" wrapper_ehdn.sh
qsub -pe smp 4 -q "$COMPUTEQUEUE" -N "ehdn_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/ehdn/$SAMPLE",LOGFILE="$OUTPUTDIR/ehdn/$DATE.log" wrapper_ehdn.sh
# Delete transfered bam and bai
qsub -pe smp 1 -q batch -N "delete_$SAMPLE" -hold_jid "eh_$SAMPLE,tredparse_$SAMPLE,gangstr_$SAMPLE,ehdn_$SAMPLE" -sync y -v SAMPLE="$SAMPLE",LOGFILE="$OUTPUTDIR/delete_$DATE.log" wrapper_delete.sh
qsub -pe smp 1 -q "$COMPUTEQUEUE" -N "delete_$SAMPLE" -hold_jid "eh_$SAMPLE,tredparse_$SAMPLE,gangstr_$SAMPLE,ehdn_$SAMPLE" -sync y -v SAMPLE="$SAMPLE",LOGFILE="$OUTPUTDIR/delete_$DATE.log" wrapper_delete.sh
......@@ -7,6 +7,8 @@
## Description: a wrapper for qsubing bam deletion for STR pipeline
## Usage: qsub -pe smp 1 -v SAMPLE=<sample>,[LOGFILE=<path to the log file>] wrapper_delete.sh
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
......@@ -30,8 +32,8 @@ fi
echo "command : rm \
$SAMPLE"
rm \
"/work/gad/shared/analyse/STR/pipeline/$SAMPLE/$SAMPLE.bam" \
"/work/gad/shared/analyse/STR/pipeline/$SAMPLE/$SAMPLE.bai"
"$OUTPUTDIR/$SAMPLE/$SAMPLE.bam" \
"$OUTPUTDIR/$SAMPLE/$SAMPLE.bai"
delete_exitcode=$?
......
......@@ -7,6 +7,8 @@
## Description: a wrapper for qsubing ExpansionHunter denovo script for STR detection
## Usage: qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTPREFIX=<output prefix>,[LOGFILE=<path to the log file>] wrapper_ehdn.sh
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
......@@ -35,21 +37,27 @@ then
exit 1
fi
# Create .bam and .bai symbolic links
# Create .bam and .bai symbolic links (EHDN expects .bam.bai)
TMPDIR="$(mktemp -d)"
ln -s "$INPUTFILE" "$TMPDIR/$(basename "$INPUTFILE")"
ln -s "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" "$TMPDIR/$(basename "$INPUTFILE").bai"
if [ -f "$INPUTFILE.bai" ]
then
ln -s "$INPUTFILE.bai" "$TMPDIR/$(basename "$INPUTFILE").bai"
else
ln -s "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" "$TMPDIR/$(basename "$INPUTFILE").bai"
fi
# Launch script command and check exit code
echo "command : /work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/bin/ExpansionHunterDenovo-v0.8.0 profile \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
echo "command : $EHDN profile \
--reads $TMPDIR/$(basename "$INPUTFILE") \
--reference $REF \
--output-prefix $OUTPUTPREFIX \
--min-anchor-mapq 50 \
--max-irr-mapq 40"
/work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/bin/ExpansionHunterDenovo-v0.8.0 profile \
"$EHDN" profile \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--reference "$REF" \
--output-prefix "$OUTPUTPREFIX" \
--min-anchor-mapq 50 \
--max-irr-mapq 40
......
......@@ -6,11 +6,8 @@
## Author: anne-sophie.denomme-pichon@u-bourgogne.fr
## Description: script to generate automatically a manifest and multisampleprofile in a tsv format, then do outlier analyses for a single patient
EHDN="/work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/bin/ExpansionHunterDenovo-v0.8.0"
EHDN_OUTLIER="/work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/scripts/outlier.py"
REFERENCE="/work/gad/shared/pipeline/hg19/index/hg19_essential.fa"
WORKDIR="/work/gad/shared/analyse/STR/pipeline"
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
......@@ -31,20 +28,20 @@ then
fi
# Generate manifest for one patient with all samples (to write lines in the file)
cd "$WORKDIR"
cd "$OUTPUTDIR"
for dijen in dijen*
do
# Check if str_profile.json exists
if [ -f "$WORKDIR/$dijen/ehdn/$dijen.str_profile.json" ]
if [ -f "$OUTPUTDIR/$dijen/ehdn/$dijen.str_profile.json" ]
then
if [ "x$dijen" = "x$CASE" ]
then
echo -e "$dijen\tcase\t$WORKDIR/$dijen/ehdn/$dijen.str_profile.json"
echo -e "$dijen\tcase\t$OUTPUTDIR/$dijen/ehdn/$dijen.str_profile.json"
else
echo -e "$dijen\tcontrol\t$WORKDIR/$dijen/ehdn/$dijen.str_profile.json"
echo -e "$dijen\tcontrol\t$OUTPUTDIR/$dijen/ehdn/$dijen.str_profile.json"
fi
fi
done > "$WORKDIR/$CASE/ehdn/$CASE.manifest.tsv"
done > "$OUTPUTDIR/$CASE/ehdn/$CASE.manifest.tsv"
ehdn_outlier_exitcode=$?
......@@ -58,7 +55,7 @@ fi
# Generate multisampleprofile for one patient with all samples
"$EHDN" merge \
--reference "$REFERENCE" \
--manifest "$WORKDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--manifest "$OUTPUTDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--output-prefix "$CASE/ehdn/$CASE"
ehdn_outlier_exitcode=$?
......@@ -72,9 +69,9 @@ fi
# Run locus-based comparison analysis
"$EHDN_OUTLIER" locus \
--manifest "$WORKDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--multisample-profile "$WORKDIR/$CASE/ehdn/$CASE.multisample_profile.json" \
--output "$WORKDIR/$CASE/ehdn/$CASE.outlier_locus.tsv"
--manifest "$OUTPUTDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--multisample-profile "$OUTPUTDIR/$CASE/ehdn/$CASE.multisample_profile.json" \
--output "$OUTPUTDIR/$CASE/ehdn/$CASE.outlier_locus.tsv"
ehdn_outlier_exitcode=$?
......@@ -88,9 +85,9 @@ fi
# Run motif_based comparison analysis
"$EHDN_OUTLIER" motif \
--manifest "$WORKDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--multisample-profile "$WORKDIR/$CASE/ehdn/$CASE.multisample_profile.json" \
--output "$WORKDIR/$CASE/ehdn/$CASE.outlier_motif.tsv"
--manifest "$OUTPUTDIR/$CASE/ehdn/$CASE.manifest.tsv" \
--multisample-profile "$OUTPUTDIR/$CASE/ehdn/$CASE.multisample_profile.json" \
--output "$OUTPUTDIR/$CASE/ehdn/$CASE.outlier_motif.tsv"
ehdn_outlier_exitcode=$?
......
......@@ -7,6 +7,8 @@
## Description: a wrapper for qsubing ExpansionHunter script for STR detection
## Usage: qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTPREFIX=<output prefix>,[LOGFILE=<path to the log file>] wrapper_expansionhunter.sh
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
......@@ -35,21 +37,27 @@ then
exit 1
fi
# Create .bam and .bai symbolic links
# Create .bam and .bai symbolic links (EH expects .bam.bai)
TMPDIR="$(mktemp -d)"
ln -s "$INPUTFILE" "$TMPDIR/$(basename "$INPUTFILE")"
ln -s "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" "$TMPDIR/$(basename "$INPUTFILE").bai"
if [ -f "$INPUTFILE.bai" ]
then
ln -s "$INPUTFILE.bai" "$TMPDIR/$(basename "$INPUTFILE").bai"
else
ln -s "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" "$TMPDIR/$(basename "$INPUTFILE").bai"
fi
# Launch script command and check exit code
echo "command : /work/gad/shared/bin/expansionhunter/ExpansionHunter-v3.1.2-linux_x86_64/bin/ExpansionHunter \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--variant-catalog /work/gad/shared/bin/expansionhunter/ExpansionHunter-v3.1.2-linux_x86_64/variant_catalog/hg19/variant_catalog.json \
echo "command : $EH \
--reads $TMPDIR/$(basename "$INPUTFILE") \
--reference $REF \
--variant-catalog $EH_VARIANT_CATALOG \
--output-prefix $OUTPUTPREFIX"
/work/gad/shared/bin/expansionhunter/ExpansionHunter-v3.1.2-linux_x86_64/bin/ExpansionHunter \
"$EH" \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--variant-catalog /work/gad/shared/bin/expansionhunter/ExpansionHunter-v3.1.2-linux_x86_64/variant_catalog/hg19/variant_catalog.json \
--reference "$REF" \
--variant-catalog "$EH_VARIANT_CATALOG" \
--output-prefix "$OUTPUTPREFIX"
expansionhunter_exitcode=$?
......
......@@ -7,6 +7,9 @@
## Description: a wrapper for qsubing GangSTR script for STR detection
## Usage: qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTPREFIX=<output prefix>,[LOGFILE=<path to the log file>] wrapper_gangstr.sh
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
then
......@@ -35,20 +38,20 @@ then
fi
# Launch script command and check exit code
echo "command : /work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \
--bam "$INPUTFILE" \
--ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \
--out "$OUTPUTPREFIX" \
--verbose
echo "command : $GANGSTR \
--bam $INPUTFILE \
--ref $REF \
--regions $GANGSTR_REGIONS \
--out $OUTPUTPREFIX \
--verbose"
# --insertmean "$INSERTMEAN" \
# --insertsdev "$INSERTDEV""
/work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \
"$GANGSTR" \
--bam "$INPUTFILE" \
--ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \
--ref "$REF" \
--regions "$GANGSTR_REGIONS" \
--out "$OUTPUTPREFIX" \
--verbose
......
......@@ -7,6 +7,8 @@
## Description: a wrapper for qsubing Tredparse script for STR detection
## Usage: qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTDIR=<output directory>,[LOGFILE=<path to the log file>] wrapper_tredparse.sh
# Source the configuration file
. "$(dirname "$0")/config.sh"
# Log file path option
if [ -z "$LOGFILE" ]
......@@ -35,13 +37,15 @@ then
exit 1
fi
# Enable the virtualenv
TREDPARSE="/work/gad/shared/bin/tredparse/Tredparse-20190901"
. "$TREDPARSE/bin/activate"
# Enable the virtualenv if any
if [ -n "$TREDPARSE_VENV" ]
then
. "$TREDPARSE_VENV"
fi
# Launch script command and check exit code
echo "command : "$TREDPARSE/bin/tred.py" "$INPUTFILE" --workdir "$OUTPUTDIR" --ref hg19"
"$TREDPARSE/bin/tred.py" "$INPUTFILE" --workdir "$OUTPUTDIR" --ref hg19
echo "command : $TREDPARSE $INPUTFILE --workdir $OUTPUTDIR --ref hg19"
"$TREDPARSE" "$INPUTFILE" --workdir "$OUTPUTDIR" --ref hg19
tredparse_exitcode=$?
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment