recal_bam.sh 3.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#!/bin/ksh

### GAD PIPELINE ###
## recal_bam.sh
## Version : 2.4.0
## Description : This script allows user to launch GATK BaseRecalibrator and GATK PrintReads to recalibrate base quality scores
## Usage : qsub -pe smp 1 -v INPUTFILE=</path/to/the/input/file>,OUTPUTFILE=</path/to/the/output/file>,[TABLE=/path/to/the/output/recalibration/table],[TARGET=/path/to/the/target/file],LOGFILE=/path/to/the/log/file],[CONFIGFILE=/path/to/the/config/file] recal_bam.sh
## Output : base recalibration table and recalibrated BAM
## Requirements : jdk 1.7.0, GATK 3.3

## Author : Emilie.Tisserant@u-bourgogne.fr
## Creation Date : 20150707
## last revision date : 20160112
## Known bugs : None

#$ -q batch
#$ -V
#$ -l mem_total=16g


sample=`basename ${INPUTFILE%%.*}`

# Log file path option
if [ -z ${LOGFILE} ]
then
    LOGFILE=recal_bam.$(date +"%F_%H-%M-%S").log
fi

# Config file path option
if [ -z ${CONFIGFILE} ]
then
    CONFIGFILE=analysis_config.tsv
fi

# Recalibration table path option
if [ -z ${TABLE} ]
then
    TABLE=$sample.recaldata.table
fi

JAVA=$(grep "javacmd" $CONFIGFILE | awk '{print $2" "$3}')
GATK=$(grep "GATKbase" $CONFIGFILE | awk '{print $2}')
TEMPORARY_DIR=$(grep "temporary_dir" $CONFIGFILE | awk '{print $2}')
REF=$(grep "reference" $CONFIGFILE | awk '{print $2}')
DBSNP=$(grep "dbsnp" $CONFIGFILE | awk '{print $2}')


# Logging
exec 1>> $LOGFILE 2>&1
echo "$(date +"%F_%H-%M-%S"): START"

# Check if config file exist
if [ -z $CONFIGFILE ] || [ ! -f $CONFIGFILE ]
then
    echo "Config file does not exist"
    echo "$(date +"%F_%H-%M-%S"): END"
    touch recal_bam.failed
    exit 1
fi
# Check if input file exist
if [ -z $INPUTFILE ] || [ ! -f $INPUTFILE ]
then
    echo "Input file does not exist"
    echo "$(date +"%F_%H-%M-%S"): END"
    touch recal_bam.failed
    exit 1
fi

# Check if output file is specified
if [ -z $OUTPUTFILE ]
then
    echo "Output file is not specified"
    echo "$(date +"%F_%H-%M-%S"): END"
    touch recal_bam.failed
    exit 1
fi

# Target file option
if [ -z ${TARGET} ]
then
    TARGET=""
else
    TARGET="-L $TARGET"
fi

# Launch GATK BaseRecalibrator and check exit code
echo "$JAVA -jar -Djava.io.tmpdir=$TEMPORARY_DIR -Xmx8g $GATK/GenomeAnalysisTK.jar -T BaseRecalibrator -I $INPUTFILE -R $REF $TARGET -knownSites $DBSNP -o $TABLE -nct 1"
$JAVA -jar -Djava.io.tmpdir=$TEMPORARY_DIR -Xmx8g $GATK/GenomeAnalysisTK.jar -T BaseRecalibrator -I $INPUTFILE -R $REF $TARGET -knownSites $DBSNP -o $TABLE -nct 1
baserecalibrator_exitcode=$?
echo "BaseRecalibrator exit code : $baserecalibrator_exitcode"
if [ $baserecalibrator_exitcode != 0 ]
then
    echo "$(date +"%F_%H-%M-%S"): END"
    touch recal_bam.failed
    exit 1
fi

# Launch GATK PrintReads and check exit code
echo "GATK command: $JAVA -Djava.io.tmpdir=$TEMPORARY_DIR -Xmx8g -jar $GATK/GenomeAnalysisTK.jar -T PrintReads -I $INPUTFILE -R $REF -BQSR $TABLE -o $OUTPUT -nct 1"
$JAVA -Djava.io.tmpdir=$TEMPORARY_DIR -Xmx8g -jar $GATK/GenomeAnalysisTK.jar -T PrintReads -I $INPUTFILE -R $REF -BQSR $TABLE -o $OUTPUTFILE -nct 1
printreads_exitcode=$?
echo "PrintReads exit code : $printreads_exitcode"
if [ $printreads_exitcode != 0 ]
then
    echo "$(date +"%F_%H-%M-%S"): END"
    touch recal_bam.failed
    exit 1
fi

echo "$(date +"%F_%H-%M-%S"): END"