Automation using SGE

parent c6d7d2ee
#! /bin/sh
### ASDP PIPELINE ###
## Licence: AGPLV3
## Author: Anne-Sophie Denommé-Pichon
## Description: script to launch the wrapper to downsample data
#ROOTOUTPUTDIR: downsampled BAM output directory
ROOTOUTPUTDIR="/work/gad/shared/analyse/STR/Data/downsampling"
#EXPERIMENT_COUNT: number of randomized tests the user want for each sample and for each downsampling rate
EXPERIMENT_COUNT=20
COMPUTE_QUEUE="batch"
for inputfile in \
"/work/gad/shared/analyse/STR/Data/dijen017/dijen017/dijen017.bam" \
"/work/gad/shared/analyse/STR/Data/dijen402/dijen402.bam"
do
sample="$(basename -s .bam "$inputfile")"
for rate in 5 10 15 20 25 30 35 40 # Downsampling rate: 0 - 100
do
for experiment_number in $(seq $EXPERIMENT_COUNT)
do
outputdir="${ROOTOUTPUTDIR}/${sample}_downsampling_${rate}_${experiment_number}"
mkdir -p "$outputdir"
qsub -pe smp 1 -o "$outputdir" -e "$outputdir" -q "$COMPUTE_QUEUE" \
-v INPUTFILE="$inputfile",OUTPUTDIR="$outputdir",SAMPLE="$sample",DOWNSAMPLING_RATE="$rate",EXPERIMENT_NUMBER="$experiment_number",LOGFILE="$outputdir/${sample}_downsampling_wrapper_${rate}_${experiment_number}.$(date +"%F_%H-%M-%S").log" "$(dirname "$0")/wrapper_downsampling.sh"
done
done
done
#! /bin/sh
DIJEN="dijen017"
INPUT="/work/gad/shared/analyse/STR/Data/${DIJEN}/${DIJEN}/${DIJEN}.bam"
DSDIR="/work/gad/shared/analyse/STR/Data/downsampling"
DSR=10
#DSR : downsampling rate (0 - 100)
#DSRDIR : downsampled BAM directory
### ASDP PIPELINE ###
## Licence: AGPLv3
## Author: Anne-Sophie Denommé-Pichon
## Description: a wrapper for BAM downsampling
## Usage: qsub -pe smp 1 -v INPUTFILE=<path to the FASTQ file>,OUTPUTDIR=<output directory>,SAMPLE=<dijen or dijex>,DOWNSAMPLING_RATE=<percentage of reads to keep in the BAM file>,EXPERIMENT_NUMBER=<experiment number>,[LOGFILE=<path to the log file>] wrapper_downsampling.sh
set -x
mkdir -p "${DSDIR}/${DIJEN}_downsampling_${DSR}_${i}"
# Log file path option
if [ -z "$LOGFILE" ]
then
echo "Logfile is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
# Logging
exec 1>> "$LOGFILE" 2>&1
echo "$(date +"%F_%H-%M-%S"): START"
# Check if input file exists
if [ ! -f "$INPUTFILE" ]
then
echo "Input file '$INPUTFILE' does not exist"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
# Check if output directory is specified
if [ -z "$OUTPUTDIR" ]
then
echo "Output prefix is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
# Check if the sample is specified
if [ -z "$SAMPLE" ]
then
echo "Sample is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
# Check if downsampling rate is specified
if [ -z "$DOWNSAMPLING_RATE" ]
then
echo "Downsampling rate is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
/user1/gad/an1770de/Scripts/bam_downsampling.py \
-i "${INPUT}" \
-o "${DSDIR}/${DIJEN}_downsampling_${DSR}_${i}/${DIJEN}_downsampling_${DSR}_${i}.bam" \
# Check if experiment number is specified
if [ -z "$EXPERIMENT_NUMBER" ]
then
echo "Experiment number is not specified"
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
DSR="$DOWNSAMPLING_RATE"
I="$EXPERIMENT_NUMBER"
# Command
"$(dirname "$0")/bam_downsampling.py" \
-i "${INPUTFILE}" \
-o "${OUTPUTDIR}/${SAMPLE}_downsampling_${DSR}_${I}.bam" \
-d "${DSR}" \
-l "${DSDIR}/${DIJEN}_downsampling_${DSR}_${i}/${DIJEN}_downsampling_${DSR}_${i}.log"
-l "${OUTPUTDIR}/${SAMPLE}_downsampling_${DSR}_${I}.log"
samtools index "${OUTPUTDIR}/${SAMPLE}_downsampling_${DSR}_${I}.bam"
downsampling_exitcode=$?
# Check exit code
echo "downsampling exit code : $downsampling_exitcode"
if [ $downsampling_exitcode != 0 ]
then
echo "$(date +"%F_%H-%M-%S"): END"
exit 1
fi
samtools index "${DSDIR}/${DIJEN}_downsampling_${DSR}_${i}/${DIJEN}_downsampling_${DSR}_${i}.bam"
echo "$(date +"%F_%H-%M-%S"): END"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment