From b2618dd9a78efda75000614a3c9eaac2e6354f29 Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Tue, 16 Jun 2015 11:02:54 +0200 Subject: [PATCH] continued star wrapper --- dge_workflow/dge_utils.sh | 2 +- dge_workflow/star_align.sh | 44 ++++++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 069fb0e..9cc7225 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -165,7 +165,7 @@ wait4jobs .tophatjobs -dge_bam_correlate . +dge_bam_correlate . & ## create tophat mapping report spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R . diff --git a/dge_workflow/star_align.sh b/dge_workflow/star_align.sh index 636d3b8..1ad3346 100755 --- a/dge_workflow/star_align.sh +++ b/dge_workflow/star_align.sh @@ -5,14 +5,23 @@ usage=' Use star to align fastq files against a genome -Usage: star_align.sh <igenome> <fastq_files>... +Usage: star_align.sh <igenome> <fastq_files> Options: +-c Cache results ' -#-c Cache results -eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@") -#eval $(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz) +#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@") +#echo "$usage" | ~/bin/docopts/docopts -h - : "$@" +#echo "$usage" | ~/bin/docopts/docopts -h - : "hallo dfds" +#eval $(echo $usage | ~/bin/docopts/docopts -h - : "$@") +#eval(exit 64) +eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : "$@")" + +# v0.7 style +#eval $(echo "$usage" | /home/brandl/bin/docopts_v0.7/docopts "hallo") + +#eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz)" #for fastqFile in ${fastq_files[@]} ; do # echo processing $fastqFile @@ -21,34 +30,33 @@ eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@") #echo $igenome -## build index if not present - +fastqFiles=${fastq_files[@]} +#echo $fastqFiles - -export star_index="$igenome/Sequence/StarIndex/genome" +export star_index="${igenome}/Sequence/StarIndex" export gtfFile="$igenome/Annotation/Genes/genes.gtf" -head $gtfFile +#head $gtfFile if [ ! -f $gtfFile ]; then >&2 echo "gtf '$gtfFile' does not exis"; exit 1; fi -## basic +## basic usage tutorial #http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/ -if ! -d "${igenome}/Sequence/StarIndex" ]; then +## build index if not present +if ! -d "${star_index}" ]; then mailme "${project}: creating STAR index for $igenome" -mkdir ${igenome}/Sequence/StarIndex +mkdir ${star_index} -cmd="STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10" +cmd="STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10" #eval $cmd -#STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10 +#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10 mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript ## prevent modification -chmod -R -w ${igenome}/Sequence/StarIndex - +chmod -R -w ${star_index} fi @@ -70,7 +78,7 @@ ll $fastqFiles for fastqFile in $fastqFiles ; do echo "submitting tophat job for $fastqFile" - # DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/mouse/trimmed/mouse_big_cyst_rep4_ca.fastq.gz + # DEBUG fastqFile=/projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz fastqBaseName=$(basename ${fastqFile%%.fastq.gz}) outputdir=$fastqBaseName @@ -80,7 +88,7 @@ for fastqFile in $fastqFiles ; do ## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag) mysub "${project}__star__${fastqBaseName}" " # tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile - STAR --genomeDir $star_index --readFilesIn /path/to/read1 $fastqFile --runThreadN 6 --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile + STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 --readFilesCommand zcat --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs done -- GitLab