From 2c2c4606666a8d3ce0f494997a93ae82f25985d3 Mon Sep 17 00:00:00 2001
From: Holger Brandl <brandl@mpi-cbg.de>
Date: Fri, 28 Nov 2014 09:49:48 +0100
Subject: [PATCH] workflow refinements

---
 dge_workflow/dge_master.sh                    |  2 +-
 dge_workflow/{lsf_rna_seq.sh => dge_utils.sh} | 52 +++++++++++++++++--
 ...correlation.sh => naha_bam_correlation.sh} |  0
 3 files changed, 50 insertions(+), 4 deletions(-)
 rename dge_workflow/{lsf_rna_seq.sh => dge_utils.sh} (76%)
 rename misc/{bam_correlation.sh => naha_bam_correlation.sh} (100%)

diff --git a/dge_workflow/dge_master.sh b/dge_workflow/dge_master.sh
index dedc6a5..344dc09 100755
--- a/dge_workflow/dge_master.sh
+++ b/dge_workflow/dge_master.sh
@@ -12,7 +12,7 @@ DGE_HOME=/projects/bioinfo/holger/bioinfo_templates/dge_workflow
 
 source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/lsf_utils.sh 2>&1 2>/dev/null)
 source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null)
-source $DGE_HOME/lsf_rna_seq.sh
+source $DGE_HOME/dge_utils.sh
 
 
 export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH
diff --git a/dge_workflow/lsf_rna_seq.sh b/dge_workflow/dge_utils.sh
similarity index 76%
rename from dge_workflow/lsf_rna_seq.sh
rename to dge_workflow/dge_utils.sh
index 0ca28af..64272ee 100755
--- a/dge_workflow/lsf_rna_seq.sh
+++ b/dge_workflow/dge_utils.sh
@@ -1,6 +1,19 @@
 ## docs
 ## http://blog.joncairns.com/2013/08/what-you-need-to-know-about-bash-functions/
 
+source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/lsf_utils.sh 2>&1 2>/dev/null)
+source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null)
+
+
+export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH
+export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH
+export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
+export PATH=/sw/apps/python/current/bin:$PATH
+export PATH=/home/brandl/bin/deepTools/bin:$PATH
+
+
+export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work
+
 
 ## create fastq report for all fastq and fastq.gz files in the current directory
 dge_fastqc(){
@@ -44,10 +57,12 @@ done
 
 wait4jobs .fastqc_jobs
 
-ziprm fastqc_logs fastqc__*
+ziprm fastqc_logs fastqc__*.log
 
 spin.R $DGE_HOME/fastqc_summary.R $outputDir
 
+mailme "$project: fastqc done in $(pwd)"
+
 }
 export -f dge_fastqc
 
@@ -66,7 +81,7 @@ for fastqFile in $* ; do
 done
 
 wait4jobs .cajobs
-ziprm cutadapt_logs ${project}__ca__*
+ziprm cutadapt_logs ${project}__ca__*.log
 
 ## todo do a small report here about what has been trimmed away and why
 
@@ -135,13 +150,42 @@ done
 
 wait4jobs .tophatjobs
 
+ziprm tophat_logs ${project}__tophat__*.log
+
+
+dge_bam_correlate .
+
 ## create tophat mapping report
 spin.R $DGE_HOME/bam_qc.R .
 
+mailme "$project: tophat done in $(pwd)"
+
 }
 export -f dge_tophat_se
 
 
+dge_bam_correlate(){
+
+if [ $# -ne 1 ]; then
+    echo "Usage: dge_bam_correlate <bam_directory>" >&2 ; return;
+fi
+
+local bamDir=$1
+
+bamFiles=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort)
+bamLabels=$(echo "$bamFiles" |sed 's!.*/!!' | sed 's/_ca.bam//g' | sed 's/.bam//g' | xargs echo); echo $bamLabels
+
+## see how well bam files correlate using untrimmed data
+bcCmd="bamCorrelate bins --bamfiles $(echo $bamFiles | xargs echo) --labels $bamLabels --plotFile='bc.pdf' --outFileCorMatrix='bc.txt' --numberOfProcessors=6 --corMethod spearman --zMin 0.5 --zMax 1"
+mysub "${project}__bamcorrelate" "$bcCmd"  -q long -n 4 -R span[hosts=1] | blockScript
+
+mailme "$project: bamcorrelate done in $(pwd)"
+
+}
+export -f dge_bam_correlate
+
+
+
 dge_cuffdiff(){
 
 local gtfFile=$1
@@ -149,7 +193,7 @@ local bamDir=$2
 local labels=$3
 
 if [ $# -ne 3 ]; then
- echo "Usage: dge_fastqc <gtf_file>  <bam directory> <labels>" >&2 ; return;
+    echo "Usage: dge_fastqc <gtf_file>  <bam_directory> <labels>" >&2 ; return;
 fi
 
 if [ -z "$(which cuffdiff)" ]; then
@@ -209,5 +253,7 @@ fi
 cp $tmpDbDir/cuffData.db .
 rm -rf $tmpDbDir ## because it's no longer needed
 
+mailme "$project: cuffdiff done in $(pwd)"
+
 }
 export -f dge_cuffdiff
diff --git a/misc/bam_correlation.sh b/misc/naha_bam_correlation.sh
similarity index 100%
rename from misc/bam_correlation.sh
rename to misc/naha_bam_correlation.sh
-- 
GitLab