#!/usr/bin/env bash #https://www.biostars.org/p/91020/ usage=' Use star to align fastq files against a genome Usage: star_align.sh <igenome> <fastq_files>... Options: -c Cache results ' #eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@") #echo "$usage" | ~/bin/docopts/docopts -h - : "$@" #echo "$usage" | ~/bin/docopts/docopts -h - : "hallo dfds" #eval $(echo $usage | ~/bin/docopts/docopts -h - : "$@") #eval(exit 64) eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : "$@")" # v0.7 style #eval $(echo "$usage" | /home/brandl/bin/docopts_v0.7/docopts "hallo") #eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz fastq2)" ## extract all configuration parameters fastqFiles=${fastq_files[@]} export star_index="${igenome}/Sequence/StarIndex" export gtfFile="$igenome/Annotation/Genes/genes.gtf" #echo $fastqFiles #head $gtfFile if [ ! -f $gtfFile ]; then >&2 echo "gtf '$gtfFile' does not exis"; exit 1; fi if [ -z "$(which STAR)" ]; then >&2 echo "no STAR binary in PATH"; exit 1; fi ## basic usage tutorial #http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/ ## build index if not present if [ ! -f "${star_index}/SA" ]; then echo "Missing STAR for ${star_index}/SA; use 'dge_create_star_index ${igenome}' to create it"; exit 1; # dge_create_star_index ${igenome}' fi echo "running STAR using igenome '${igenome}' for the following files" #fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz) for fastqFile in $fastqFiles ; do echo "submitting STAR job for $fastqFile" # DEBUG fastqFile=/projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz fastqBaseName=$(basename ${fastqFile%%.fastq.gz}) outputdir=$fastqBaseName ## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464 ### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile ## params: # --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag) # --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout to get rid of artifical junctions # --quantMode GeneCounts see https://groups.google.com/forum/#!searchin/rna-star/GeneCounts/rna-star/gZRJx3ElRNo/p5FjBYKuY00J mysub "${project}__star__${fastqBaseName}" " STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 --readFilesCommand zcat --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam samtools index ${fastqBaseName}.bam " -n 5 -R span[hosts=1] -q medium | joblist .starjobs done wait4jobs .starjobs rm -rf *STARgenome *.Log.progress.out _STARtmp *.SJ.out.tab *Log.out #ziprm star_outlogs *Log.out dge_bam_correlate . & ## estimate expresssion with http://bioinf.wehi.edu.au/featureCounts/ ##Summarize multiple datasets at the same time: #featureCounts -t exon -g gene_id -a annotation.gtf -o counts.txt library1.bam library2.bam library3.bam #bamFile=control_3.bam #featureCounts -t exon -g gene_id -a ${gtfFile} -o feature_counts.txt ${bamFile} -T 5 mysub "${project}__feature_counts" "featureCounts -t exon -g gene_id -a ${gtfFile} -o gene_counts.txt $(ls *bam) -T 5" -q medium | blockScript ## create tophat mapping report spin.R ${NGS_TOOLS}/dge_workflow/star_qc.R . mailme "${project}: STAR done in $(pwd)"