Skip to content
Snippets Groups Projects
star_align.sh 2.39 KiB
Newer Older
#!/usr/bin/env bash


#https://www.biostars.org/p/91020/

usage='
Use star to align fastq files against a genome
Usage: spin.R <igenome> <fastq_files>...

Options:
'
-c        Cache results

#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq)
eval $(echo "$usage" | ~/bin/docopts/docopts -h -  : asdf asdf.fastq another.fastq)
for fastqFile in ; do
    echo processing $fastqFile
done

echo $igenome


gtfFile=$1
bamDir=$2
labels=$3

echo "${!dopts[@]}"
echo "${dopts[-c]}"
echo "${dopts[igenome]}"


#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/
#args=$(echo "$usage" | ~/bin/docopts/docopts -h -  : "test.R")

## this will define one environment variable per parameter (c,e,m,v)
eval $(echo "$usage" | ~/bin/docopts/docopts -h -  : -e test.R)

fastqFiles=${fastq_files[@]}

# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38

if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ];
     then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return;
fi



export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome"
export gtfFile="$IGENOME/Annotation/Genes/genes.gtf"
#head $gtfFile

if [ ! -f $gtfFile ]; then
    >&2 echo "gtf '$gtfFile' does not exis"; return;
fi

if [ -z "$(which tophat)" ]; then
    >&2 echo "no tomcat binary in PATH"; return;
fi


echo "running tophat using igenome '$IGENOME' for the following files"
ll $fastqFiles

#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)

for fastqFile in $fastqFiles ; do
    echo "submitting tophat job for $fastqFile"

    # DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/mouse/trimmed/mouse_big_cyst_rep4_ca.fastq.gz
    fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
    outputdir=$fastqBaseName

    ## uniquely mapping reads only:   http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
    ###     tophat -p6  -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
    mysub "${project}__tophat__${fastqBaseName}" "
    tophat -p6  -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile

    mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
    samtools index $outputdir/$(basename $outputdir).bam
    " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done

wait4jobs .tophatjobs



dge_bam_correlate .

## create tophat mapping report
spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .

mailme "$project: tophat done in $(pwd)"