From d3140b19e0617dd5e0bb2fb988b8fb041c4e4c20 Mon Sep 17 00:00:00 2001
From: Holger Brandl <brandl@mpi-cbg.de>
Date: Wed, 18 Mar 2015 15:20:41 +0100
Subject: [PATCH] synced cs workflows

---
 chipseq_workflow/cs_region_dba.R |  4 +++-
 chipseq_workflow/cs_snippets.sh  | 12 +++++++++++-
 chipseq_workflow/peak_report.R   |  5 +++++
 dge_workflow/dge_utils.sh        |  3 ++-
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/chipseq_workflow/cs_region_dba.R b/chipseq_workflow/cs_region_dba.R
index 7f1a02d..4c4f25b 100755
--- a/chipseq_workflow/cs_region_dba.R
+++ b/chipseq_workflow/cs_region_dba.R
@@ -3,7 +3,9 @@
 devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R")
 devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R")
 #devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R")
-devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R")
+#devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R")
+devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/bio/diffex_commons.R")
+
 
 require(tidyr)
 require(knitr)
diff --git a/chipseq_workflow/cs_snippets.sh b/chipseq_workflow/cs_snippets.sh
index f781a93..b6f55d8 100755
--- a/chipseq_workflow/cs_snippets.sh
+++ b/chipseq_workflow/cs_snippets.sh
@@ -1,10 +1,20 @@
 ########################################################################################################################
-### Picard ResortSam
+### Chromosome Sorting
+
+## By default the sorting of samtools faidx is not good, but we can resort it, and samtools sort will respect this sorting order
+## To fix existing alginments either use ReorderSam or reheader | resort with samtools
+
+## check consist sorting order
+#samtools view /projects/bioinfo/holger/projects/krause_chipseq/alignments_mmfilt/H3HA_Sphere_mmf.bam  | cut -f 3 | uniq
+#sort -k1,1g -k2,2n $regionModel.bed | cut -f1 | uniq
+### --> use reorder sam http://sourceforge.net/p/samtools/mailman/samtools-help/thread/4DB6F0CD.6050403@umdnj.edu/
 
 sort -k1V $bowtieIndex.fa.dict_tmp > $bowtieIndex.dict
 
 samtools view -h $bamFile | removeMultiMappers > ${bamBaseName}_mmf.tmp.bam
 samtools index ${bamBaseName}_mmf.tmp.bam # because ReorderSam runs substantially faster if the input is an indexed BAM file.
+
+# Use Picard ResortSam for resorting --> this will sort according to chromosome order in reference fasta only
 picard ReorderSam I=${bamBaseName}_mmf.tmp.bam O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa
 ##    samtools view -h $bamFile | removeMultiMappers | picard ReorderSam I=/dev/stdin O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa
 samtools index ${bamBaseName}_mmf.bam
diff --git a/chipseq_workflow/peak_report.R b/chipseq_workflow/peak_report.R
index cb1c528..005a277 100755
--- a/chipseq_workflow/peak_report.R
+++ b/chipseq_workflow/peak_report.R
@@ -35,8 +35,13 @@ b_peaks <- list.files(".", "*.broadPeak.*", full.names=TRUE, recursive=T) %>%
 
 peaks <- rbind_list(n_peaks, b_peaks)
 
+## todo remove this later
+peaks <- mutate(peaks, file=file %>%str_replace( "_narrow", ".narrow") %>% str_replace("_broad", ".broad"))
+
+with(peaks, as.data.frame(table(file)))
 peaks %<>% separate(file, c("sample", "peak_type", "ext"), sep="[.]") %>% select(-ext)
 
+
 splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F)
 #peaks %>% splitProtTime %>% head
 
diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh
index 7ef500a..0407aac 100755
--- a/dge_workflow/dge_utils.sh
+++ b/dge_workflow/dge_utils.sh
@@ -19,7 +19,8 @@ export PATH=/home/brandl/bin/spinr:$PATH
 # which tophat;  which bowtie2; which cuffdiff
 
 
-export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work
+## no longer needed because packages are no kept in home
+#export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work
 
 
 ## create fastq report for all fastq and fastq.gz files in the current directory
-- 
GitLab