From d3140b19e0617dd5e0bb2fb988b8fb041c4e4c20 Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Wed, 18 Mar 2015 15:20:41 +0100 Subject: [PATCH] synced cs workflows --- chipseq_workflow/cs_region_dba.R | 4 +++- chipseq_workflow/cs_snippets.sh | 12 +++++++++++- chipseq_workflow/peak_report.R | 5 +++++ dge_workflow/dge_utils.sh | 3 ++- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/chipseq_workflow/cs_region_dba.R b/chipseq_workflow/cs_region_dba.R index 7f1a02d..4c4f25b 100755 --- a/chipseq_workflow/cs_region_dba.R +++ b/chipseq_workflow/cs_region_dba.R @@ -3,7 +3,9 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R") #devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R") -devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R") +#devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/bio/diffex_commons.R") + require(tidyr) require(knitr) diff --git a/chipseq_workflow/cs_snippets.sh b/chipseq_workflow/cs_snippets.sh index f781a93..b6f55d8 100755 --- a/chipseq_workflow/cs_snippets.sh +++ b/chipseq_workflow/cs_snippets.sh @@ -1,10 +1,20 @@ ######################################################################################################################## -### Picard ResortSam +### Chromosome Sorting + +## By default the sorting of samtools faidx is not good, but we can resort it, and samtools sort will respect this sorting order +## To fix existing alginments either use ReorderSam or reheader | resort with samtools + +## check consist sorting order +#samtools view /projects/bioinfo/holger/projects/krause_chipseq/alignments_mmfilt/H3HA_Sphere_mmf.bam | cut -f 3 | uniq +#sort -k1,1g -k2,2n $regionModel.bed | cut -f1 | uniq +### --> use reorder sam http://sourceforge.net/p/samtools/mailman/samtools-help/thread/4DB6F0CD.6050403@umdnj.edu/ sort -k1V $bowtieIndex.fa.dict_tmp > $bowtieIndex.dict samtools view -h $bamFile | removeMultiMappers > ${bamBaseName}_mmf.tmp.bam samtools index ${bamBaseName}_mmf.tmp.bam # because ReorderSam runs substantially faster if the input is an indexed BAM file. + +# Use Picard ResortSam for resorting --> this will sort according to chromosome order in reference fasta only picard ReorderSam I=${bamBaseName}_mmf.tmp.bam O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa ## samtools view -h $bamFile | removeMultiMappers | picard ReorderSam I=/dev/stdin O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa samtools index ${bamBaseName}_mmf.bam diff --git a/chipseq_workflow/peak_report.R b/chipseq_workflow/peak_report.R index cb1c528..005a277 100755 --- a/chipseq_workflow/peak_report.R +++ b/chipseq_workflow/peak_report.R @@ -35,8 +35,13 @@ b_peaks <- list.files(".", "*.broadPeak.*", full.names=TRUE, recursive=T) %>% peaks <- rbind_list(n_peaks, b_peaks) +## todo remove this later +peaks <- mutate(peaks, file=file %>%str_replace( "_narrow", ".narrow") %>% str_replace("_broad", ".broad")) + +with(peaks, as.data.frame(table(file))) peaks %<>% separate(file, c("sample", "peak_type", "ext"), sep="[.]") %>% select(-ext) + splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F) #peaks %>% splitProtTime %>% head diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 7ef500a..0407aac 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -19,7 +19,8 @@ export PATH=/home/brandl/bin/spinr:$PATH # which tophat; which bowtie2; which cuffdiff -export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work +## no longer needed because packages are no kept in home +#export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work ## create fastq report for all fastq and fastq.gz files in the current directory -- GitLab