From 2c2c4606666a8d3ce0f494997a93ae82f25985d3 Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Fri, 28 Nov 2014 09:49:48 +0100 Subject: [PATCH] workflow refinements --- dge_workflow/dge_master.sh | 2 +- dge_workflow/{lsf_rna_seq.sh => dge_utils.sh} | 52 +++++++++++++++++-- ...correlation.sh => naha_bam_correlation.sh} | 0 3 files changed, 50 insertions(+), 4 deletions(-) rename dge_workflow/{lsf_rna_seq.sh => dge_utils.sh} (76%) rename misc/{bam_correlation.sh => naha_bam_correlation.sh} (100%) diff --git a/dge_workflow/dge_master.sh b/dge_workflow/dge_master.sh index dedc6a5..344dc09 100755 --- a/dge_workflow/dge_master.sh +++ b/dge_workflow/dge_master.sh @@ -12,7 +12,7 @@ DGE_HOME=/projects/bioinfo/holger/bioinfo_templates/dge_workflow source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/lsf_utils.sh 2>&1 2>/dev/null) source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null) -source $DGE_HOME/lsf_rna_seq.sh +source $DGE_HOME/dge_utils.sh export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH diff --git a/dge_workflow/lsf_rna_seq.sh b/dge_workflow/dge_utils.sh similarity index 76% rename from dge_workflow/lsf_rna_seq.sh rename to dge_workflow/dge_utils.sh index 0ca28af..64272ee 100755 --- a/dge_workflow/lsf_rna_seq.sh +++ b/dge_workflow/dge_utils.sh @@ -1,6 +1,19 @@ ## docs ## http://blog.joncairns.com/2013/08/what-you-need-to-know-about-bash-functions/ +source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/lsf_utils.sh 2>&1 2>/dev/null) +source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null) + + +export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH +export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH +export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH +export PATH=/sw/apps/python/current/bin:$PATH +export PATH=/home/brandl/bin/deepTools/bin:$PATH + + +export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work + ## create fastq report for all fastq and fastq.gz files in the current directory dge_fastqc(){ @@ -44,10 +57,12 @@ done wait4jobs .fastqc_jobs -ziprm fastqc_logs fastqc__* +ziprm fastqc_logs fastqc__*.log spin.R $DGE_HOME/fastqc_summary.R $outputDir +mailme "$project: fastqc done in $(pwd)" + } export -f dge_fastqc @@ -66,7 +81,7 @@ for fastqFile in $* ; do done wait4jobs .cajobs -ziprm cutadapt_logs ${project}__ca__* +ziprm cutadapt_logs ${project}__ca__*.log ## todo do a small report here about what has been trimmed away and why @@ -135,13 +150,42 @@ done wait4jobs .tophatjobs +ziprm tophat_logs ${project}__tophat__*.log + + +dge_bam_correlate . + ## create tophat mapping report spin.R $DGE_HOME/bam_qc.R . +mailme "$project: tophat done in $(pwd)" + } export -f dge_tophat_se +dge_bam_correlate(){ + +if [ $# -ne 1 ]; then + echo "Usage: dge_bam_correlate <bam_directory>" >&2 ; return; +fi + +local bamDir=$1 + +bamFiles=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort) +bamLabels=$(echo "$bamFiles" |sed 's!.*/!!' | sed 's/_ca.bam//g' | sed 's/.bam//g' | xargs echo); echo $bamLabels + +## see how well bam files correlate using untrimmed data +bcCmd="bamCorrelate bins --bamfiles $(echo $bamFiles | xargs echo) --labels $bamLabels --plotFile='bc.pdf' --outFileCorMatrix='bc.txt' --numberOfProcessors=6 --corMethod spearman --zMin 0.5 --zMax 1" +mysub "${project}__bamcorrelate" "$bcCmd" -q long -n 4 -R span[hosts=1] | blockScript + +mailme "$project: bamcorrelate done in $(pwd)" + +} +export -f dge_bam_correlate + + + dge_cuffdiff(){ local gtfFile=$1 @@ -149,7 +193,7 @@ local bamDir=$2 local labels=$3 if [ $# -ne 3 ]; then - echo "Usage: dge_fastqc <gtf_file> <bam directory> <labels>" >&2 ; return; + echo "Usage: dge_fastqc <gtf_file> <bam_directory> <labels>" >&2 ; return; fi if [ -z "$(which cuffdiff)" ]; then @@ -209,5 +253,7 @@ fi cp $tmpDbDir/cuffData.db . rm -rf $tmpDbDir ## because it's no longer needed +mailme "$project: cuffdiff done in $(pwd)" + } export -f dge_cuffdiff diff --git a/misc/bam_correlation.sh b/misc/naha_bam_correlation.sh similarity index 100% rename from misc/bam_correlation.sh rename to misc/naha_bam_correlation.sh -- GitLab