Skip to content
Snippets Groups Projects
star_align.sh 3.23 KiB
Newer Older
#!/usr/bin/env bash


#https://www.biostars.org/p/91020/

usage='
Use star to align fastq files against a genome
Holger Brandl's avatar
Holger Brandl committed
Usage: star_align.sh <igenome> <fastq_files>

Options:
Holger Brandl's avatar
Holger Brandl committed
-c        Cache results
Holger Brandl's avatar
Holger Brandl committed
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@")
#echo "$usage" | ~/bin/docopts/docopts -h -  : "$@"
#echo "$usage" | ~/bin/docopts/docopts -h - : "hallo dfds"
#eval $(echo  $usage | ~/bin/docopts/docopts -h - : "$@")
#eval(exit 64)
eval "$(echo  "$usage" | ~/bin/docopts/docopts -h - : "$@")"

# v0.7 style
#eval $(echo  "$usage" | /home/brandl/bin/docopts_v0.7/docopts "hallo")

#eval "$(echo "$usage" | ~/bin/docopts/docopts -h -  : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz)"
Holger Brandl's avatar
Holger Brandl committed
#for fastqFile in ${fastq_files[@]} ; do
#    echo processing $fastqFile
#done
#
#echo $igenome
Holger Brandl's avatar
Holger Brandl committed
fastqFiles=${fastq_files[@]}
#echo $fastqFiles
Holger Brandl's avatar
Holger Brandl committed
export star_index="${igenome}/Sequence/StarIndex"
Holger Brandl's avatar
Holger Brandl committed
export gtfFile="$igenome/Annotation/Genes/genes.gtf"
Holger Brandl's avatar
Holger Brandl committed
#head $gtfFile
Holger Brandl's avatar
Holger Brandl committed
if [ ! -f $gtfFile ]; then
    >&2 echo "gtf '$gtfFile' does not exis"; exit 1;
fi
Holger Brandl's avatar
Holger Brandl committed
## basic usage tutorial
Holger Brandl's avatar
Holger Brandl committed
#http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/
Holger Brandl's avatar
Holger Brandl committed
## build index if not present
if ! -d "${star_index}" ]; then
Holger Brandl's avatar
Holger Brandl committed
mailme "${project}: creating STAR index for $igenome"
Holger Brandl's avatar
Holger Brandl committed
mkdir ${star_index}
Holger Brandl's avatar
Holger Brandl committed
cmd="STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
Holger Brandl's avatar
Holger Brandl committed
#eval $cmd
Holger Brandl's avatar
Holger Brandl committed
#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
Holger Brandl's avatar
Holger Brandl committed
mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium  | blockScript

## prevent modification
Holger Brandl's avatar
Holger Brandl committed
chmod -R -w ${star_index}
Holger Brandl's avatar
Holger Brandl committed
fi



if [ ! -f $gtfFile ]; then
Holger Brandl's avatar
Holger Brandl committed
    >&2 echo "gtf '$gtfFile' does not exis"; exit 1;
Holger Brandl's avatar
Holger Brandl committed
if [ -z "$(which STAR)" ]; then
    >&2 echo "no tomcat binary in PATH"; exit 1;
Holger Brandl's avatar
Holger Brandl committed
echo "running STAR using igenome '${igenome}' for the following files"
ll $fastqFiles

#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)

for fastqFile in $fastqFiles ; do
    echo "submitting tophat job for $fastqFile"

Holger Brandl's avatar
Holger Brandl committed
    # DEBUG fastqFile=/projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz
    fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
    outputdir=$fastqBaseName

    ## uniquely mapping reads only:   http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
    ###     tophat -p6  -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile

Holger Brandl's avatar
Holger Brandl committed
    ## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
    mysub "${project}__star__${fastqBaseName}" "
#    tophat -p6  -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
Holger Brandl's avatar
Holger Brandl committed
    STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 --readFilesCommand zcat --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile
    " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done

wait4jobs .tophatjobs



dge_bam_correlate .

## create tophat mapping report
Holger Brandl's avatar
Holger Brandl committed
## todo adjust report to STAR
#spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
Holger Brandl's avatar
Holger Brandl committed
mailme "$project: STAR done in $(pwd)"