Create pipeline to launch STR detection tools on GAD genomes cohort

parent 85802bc1
#! /bin/sh
### ASDP PIPELINE ###
## launch_wrapper_ehdn.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the wrapper for qsubing ExpansionHunter denovo script for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191102
## last revision date : 20191126
## Known bugs : None
INPUTFILE=/work/gad/shared/analyse/STR/Data/dijen017/dijen017/dijen017.bam
DATE="$(date +"%F_%H-%M-%S")"
OUTPUTDIR="/work/gad/shared/analyse/STR/ExpansionHunterDeNovo/$DATE"
OUTPUTPREFIX="$OUTPUTDIR/$(basename "$INPUTFILE")_$DATE"
LOGFILE="$OUTPUTDIR/$DATE.log"
# Launch the script on local host with --local option and on SGE with qsub without the --local option
if [ $# -eq 1 ] && [ "x$1" = x--local ]
then
mkdir -p "$OUTPUTDIR"
INPUTFILE="$INPUTFILE" OUTPUTPREFIX="$OUTPUTPREFIX" LOGFILE="$LOGFILE" "$(dirname "$0")/wrapper_ehdn.sh"
else
mkdir -p "$OUTPUTDIR"
qsub -pe smp 4 -q batch -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTPREFIX",LOGFILE="$LOGFILE" wrapper_ehdn.sh
fi
#! /bin/sh
### ASDP PIPELINE ###
## launch_pipeline.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the pipeline for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
/work/gad/shared/bin/parallel/parallel-20150522-1.el7.cern/bin/parallel \
--jobs 10 \
--line-buffer \
"$(dirname "$0")/pipeline.sh" \
< genomes.list
#! /bin/sh
### ASDP PIPELINE ###
## launch_wrapper_transfer.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the wrapper for qsubing ExpansionHunter denovo script for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191102
## last revision date : 20191126
## Known bugs : None
INPUTFILE="/archive/gad/shared/bam_new_genome_temp/dijen016.bam"
DATE="$(date +"%F_%H-%M-%S")"
OUTPUTDIR="/work/gad/shared/analyse/STR/Data"
LOGFILE="$OUTPUTDIR/$DATE.log"
# Launch the script on local host with --local option and on SGE with qsub without the --local option
if [ $# -eq 1 ] && [ "x$1" = x--local ]
then
mkdir -p "$OUTPUTDIR"
INPUTFILE="$INPUTFILE" OUTPUTDIR="$OUTPUTDIR" LOGFILE="$LOGFILE" "$(dirname "$0")/wrapper_transfer.sh"
else
mkdir -p "$OUTPUTDIR"
qsub -pe smp 1 -q transfer -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR",LOGFILE="$LOGFILE" wrapper_transfer.sh
fi
#! /bin/sh
### ASDP PIPELINE ###
## pipeline.sh
## Version : 0.0.1
## Licence : FIXME
## Description : script to launch the pipeline for STR detection
## Usage :
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
SAMPLE="$1"
# Check if sample is specified
if [ -z "$SAMPLE" ]
then
echo "Sample is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
INPUTFILE="/archive/gad/shared/bam_new_genome_temp/$SAMPLE.bam"
DATE="$(date +"%F_%H-%M-%S")"
OUTPUTDIR="/work/gad/shared/analyse/STR/pipeline/$SAMPLE"
# Transfer bam and bai from archive to work
mkdir -p "$OUTPUTDIR"
qsub -pe smp 1 -q transfer -N "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR",LOGFILE="$OUTPUTDIR/transfer_$DATE.log" wrapper_transfer.sh
INPUTFILE="$OUTPUTDIR/$SAMPLE.bam"
# Launch ExpansionHunter
mkdir -p "$OUTPUTDIR/eh"
qsub -pe smp 4 -q batch -N "eh_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/eh/$SAMPLE",LOGFILE="$OUTPUTDIR/eh/$DATE.log" wrapper_expansionhunter.sh
# Launch Tredparse
mkdir -p "$OUTPUTDIR/tredparse"
qsub -pe smp 4 -q batch -N "tredparse_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTDIR="$OUTPUTDIR/tredparse",LOGFILE="$OUTPUTDIR/tredparse/$DATE.log" wrapper_tredparse.sh
# Launch GangSTR
mkdir -p "$OUTPUTDIR/gangstr"
qsub -pe smp 4 -q batch -N "gangstr_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/gangstr/$SAMPLE",LOGFILE="$OUTPUTDIR/gangstr/$DATE.log" wrapper_gangstr.sh
# Launch ehdn profile
mkdir -p "$OUTPUTDIR/ehdn"
qsub -pe smp 4 -q batch -N "ehdn_$SAMPLE" -hold_jid "transfer_$SAMPLE" -v INPUTFILE="$INPUTFILE",OUTPUTPREFIX="$OUTPUTDIR/ehdn/$SAMPLE",LOGFILE="$OUTPUTDIR/ehdn/$DATE.log" wrapper_ehdn.sh
# Delete transfered bam and bai
qsub -pe smp 1 -q batch -hold_jid "eh_$SAMPLE,tredparse_$SAMPLE,gangstr_$SAMPLE,ehdn_$SAMPLE" -sync y -v SAMPLE="$SAMPLE",LOGFILE="$OUTPUTDIR/delete_$DATE.log" wrapper_delete.sh
#! /bin/sh
### ASDP PIPELINE ###
## wrapper_delete.sh
## Version : 0.0.1
## Licence : FIXME
## Description : a wrapper for qsubing bam deletion for STR pipeline
## Usage : qsub -pe smp 1 -v SAMPLE=<sample>,[LOGFILE=<path to the log file>] wrapper_delete.sh
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
# Log file path option
if [ -z "$LOGFILE" ]
then
LOGFILE=delete.$(date +"%F_%H-%M-%S").log
fi
# Logging
exec 1>> "$LOGFILE" 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if sample is specified
if [ -z "$SAMPLE" ]
then
echo "Sample is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch delete.failed
exit 1
fi
# Delete and check exit code
echo "command : rm \
$SAMPLE"
rm \
"/work/gad/shared/analyse/STR/pipeline/$SAMPLE/$SAMPLE.bam" \
"/work/gad/shared/analyse/STR/pipeline/$SAMPLE/$SAMPLE.bai"
delete_exitcode=$?
echo "delete exit code : $delete_exitcode"
if [ $delete_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch delete.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
#! /bin/sh
### ASDP PIPELINE ###
## wrapper_ehdn.sh
## Version : 0.0.1
## Licence : FIXME
## Description : a wrapper for qsubing ExpansionHunter denovo script for STR detection
## Usage : qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTPREFIX=<output prefix>,[LOGFILE=<path to the log file>] wrapper_ehdn.sh
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
# Log file path option
if [ -z "$LOGFILE" ]
then
LOGFILE=ehdn.$(date +"%F_%H-%M-%S").log
fi
# Logging
exec 1>> "$LOGFILE" 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if input file exists
if [ ! -f "$INPUTFILE" ]
then
echo "Input file '$INPUTFILE' does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch ehdn.failed
exit 1
fi
# Check if output prefix is specified
if [ -z "$OUTPUTPREFIX" ]
then
echo "Output prefix is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch ehdn.failed
exit 1
fi
# Create .bam and .bai symbolic links
TMPDIR="$(mktemp -d)"
ln -s "$INPUTFILE" "$TMPDIR/$(basename "$INPUTFILE")"
ln -s "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" "$TMPDIR/$(basename "$INPUTFILE").bai"
# Launch script command and check exit code
echo "command : /work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/bin/ExpansionHunterDenovo-v0.8.0 profile \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--output-prefix $OUTPUTPREFIX \
--min-anchor-mapq 50 \
--max-irr-mapq 40"
/work/gad/shared/bin/expansionhunterdenovo/ExpansionHunterDenovo-v0.8.0-linux_x86_64/bin/ExpansionHunterDenovo-v0.8.0 profile \
--reads "$TMPDIR/$(basename "$INPUTFILE")" \
--reference /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--output-prefix "$OUTPUTPREFIX" \
--min-anchor-mapq 50 \
--max-irr-mapq 40
ehdn_exitcode=$?
# Remove .bam and .bai symbolic links
rm "$TMPDIR/$(basename "$INPUTFILE")"
rm "$TMPDIR/$(basename "$INPUTFILE").bai"
rmdir "$TMPDIR"
echo "ehdn exit code : $ehdn_exitcode"
if [ $ehdn_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch ehdn.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
...@@ -50,18 +50,20 @@ echo "command : /work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \ ...@@ -50,18 +50,20 @@ echo "command : /work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \
--ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \ --ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \ --regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \
--out "$OUTPUTPREFIX" \ --out "$OUTPUTPREFIX" \
--verbose \ --verbose
--insertmean "$INSERTMEAN" \
--insertsdev "$INSERTDEV"" # --insertmean "$INSERTMEAN" \
# --insertsdev "$INSERTDEV""
/work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \ /work/gad/shared/bin/gangstr/GangSTR-2.4/bin/GangSTR \
--bam "$INPUTFILE" \ --bam "$INPUTFILE" \
--ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \ --ref /work/gad/shared/pipeline/hg19/index/hg19_essential.fa \
--regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \ --regions /work/gad/shared/bin/gangstr/STRregions/hg19_ver13_1.bed \
--out "$OUTPUTPREFIX" \ --out "$OUTPUTPREFIX" \
--verbose \ --verbose
--insertmean "$INSERTMEAN" \
--insertsdev "$INSERTDEV" # --insertmean "$INSERTMEAN" \
# --insertsdev "$INSERTDEV"
gangstr_exitcode=$? gangstr_exitcode=$?
......
#! /bin/sh
### ASDP PIPELINE ###
## wrapper_transfer.sh
## Version : 0.0.1
## Licence : FIXME
## Description : a wrapper for qsubing bam transfer for STR pipeline
## Usage : qsub -pe smp 1 -v INPUTFILE=<path to the bam file>,OUTPUTDIR=<output directory>,[LOGFILE=<path to the log file>] wrapper_transfer.sh
## Output : FIXME
## Requirements : FIXME
## Author : anne-sophie.denomme-pichon@u-bourgogne.fr
## Creation Date : 20191208
## last revision date : 20191208
## Known bugs : None
# Log file path option
if [ -z "$LOGFILE" ]
then
LOGFILE=transfer.$(date +"%F_%H-%M-%S").log
fi
# Logging
exec 1>> "$LOGFILE" 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if input file exists
if [ ! -f "$INPUTFILE" ]
then
echo "Input file '$INPUTFILE' does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
touch transfer.failed
exit 1
fi
# Check if output directory is specified
if [ -z "$OUTPUTDIR" ]
then
echo "Output directory is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
touch transfer.failed
exit 1
fi
# Transfer and check exit code
echo "command : rsync -aAX \
$INPUTFILE $(echo "$INPUTFILE" | sed 's/\.bam$/.bai/') \
$OUTPUTDIR"
rsync -aAX \
"$INPUTFILE" "$(echo "$INPUTFILE" | sed 's/\.bam$/.bai/')" \
"$OUTPUTDIR"
transfer_exitcode=$?
echo "transfer exit code : $transfer_exitcode"
if [ $transfer_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
touch transfer.failed
exit 1
fi
echo "$(date +"%F_%H-%M-%S"): END"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment