diff --git a/dge_workflow/dge_master_template.sh b/dge_workflow/dge_master_template.sh index f98fb7a535e5131cb7f6d0e9842df43342c477ac..9cb17767486cdd011a52858e0c30f47bb0bcd561 100755 --- a/dge_workflow/dge_master_template.sh +++ b/dge_workflow/dge_master_template.sh @@ -124,7 +124,7 @@ mailme "$project: mapping done" ######################################################################################################################### -#### Basic Alginment QC and technical replicate grouping +#### Technical replicate grouping # #mcdir $baseDir/trep_pooled # diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 3b2483e2c09a066c4ef8541d41468860ee1d56f0..069fb0e1e34b5d6e4f60837e5838c58f2cfbacd8 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -12,6 +12,7 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.10/R/s ## todo tweak this to work on bioinfo as well export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH +export PATH=/home/brandl/bin/STAR/STAR-STAR_2.4.1d/source:$PATH export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH export PATH=/sw/apps/python/current/bin:$PATH export PATH=/home/brandl/bin/deepTools/bin:$PATH @@ -157,7 +158,7 @@ for fastqFile in $fastqFiles ; do mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam samtools index $outputdir/$(basename $outputdir).bam - " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs + " -n 5 -R span[hosts=1] -q medium | joblist .tophatjobs done wait4jobs .tophatjobs diff --git a/dge_workflow/star_align.sh b/dge_workflow/star_align.sh index 446de4fdf5c5136a198d5c5404ea4444c20bfda8..636d3b8a6399473f8b340e867731d68c300f8d62 100755 --- a/dge_workflow/star_align.sh +++ b/dge_workflow/star_align.sh @@ -5,60 +5,64 @@ usage=' Use star to align fastq files against a genome -Usage: spin.R <igenome> <fastq_files>... +Usage: star_align.sh <igenome> <fastq_files>... Options: ' --c Cache results +#-c Cache results -#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq) -eval $(echo "$usage" | ~/bin/docopts/docopts -h - : asdf asdf.fastq another.fastq) -for fastqFile in ; do - echo processing $fastqFile -done +eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@") +#eval $(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz) -echo $igenome +#for fastqFile in ${fastq_files[@]} ; do +# echo processing $fastqFile +#done +# +#echo $igenome -gtfFile=$1 -bamDir=$2 -labels=$3 +## build index if not present -echo "${!dopts[@]}" -echo "${dopts[-c]}" -echo "${dopts[igenome]}" -#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/ -#args=$(echo "$usage" | ~/bin/docopts/docopts -h - : "test.R") +export star_index="$igenome/Sequence/StarIndex/genome" +export gtfFile="$igenome/Annotation/Genes/genes.gtf" +head $gtfFile -## this will define one environment variable per parameter (c,e,m,v) -eval $(echo "$usage" | ~/bin/docopts/docopts -h - : -e test.R) +if [ ! -f $gtfFile ]; then + >&2 echo "gtf '$gtfFile' does not exis"; exit 1; +fi -fastqFiles=${fastq_files[@]} +## basic +#http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/ -# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 +if ! -d "${igenome}/Sequence/StarIndex" ]; then -if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ]; - then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return; -fi +mailme "${project}: creating STAR index for $igenome" +mkdir ${igenome}/Sequence/StarIndex +cmd="STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10" +#eval $cmd +#STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10 +mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript + +## prevent modification +chmod -R -w ${igenome}/Sequence/StarIndex + +fi -export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome" -export gtfFile="$IGENOME/Annotation/Genes/genes.gtf" -#head $gtfFile if [ ! -f $gtfFile ]; then - >&2 echo "gtf '$gtfFile' does not exis"; return; + >&2 echo "gtf '$gtfFile' does not exis"; exit 1; fi -if [ -z "$(which tophat)" ]; then - >&2 echo "no tomcat binary in PATH"; return; +if [ -z "$(which STAR)" ]; then + >&2 echo "no tomcat binary in PATH"; exit 1; fi -echo "running tophat using igenome '$IGENOME' for the following files" +echo "running STAR using igenome '${igenome}' for the following files" ll $fastqFiles #fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz) @@ -72,11 +76,11 @@ for fastqFile in $fastqFiles ; do ## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464 ### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile - mysub "${project}__tophat__${fastqBaseName}" " - tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile - mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam - samtools index $outputdir/$(basename $outputdir).bam + ## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag) + mysub "${project}__star__${fastqBaseName}" " +# tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile + STAR --genomeDir $star_index --readFilesIn /path/to/read1 $fastqFile --runThreadN 6 --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs done @@ -87,6 +91,7 @@ wait4jobs .tophatjobs dge_bam_correlate . ## create tophat mapping report -spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R . +## todo adjust report to STAR +#spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R . -mailme "$project: tophat done in $(pwd)" \ No newline at end of file +mailme "$project: STAR done in $(pwd)" \ No newline at end of file