From c183f739b93dab0204cad4f88dbec26a68afa42c Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Mon, 9 Mar 2015 13:57:00 +0100 Subject: [PATCH] cont. chipseq qc --- chipseq_workflow/chipseq_utils.sh | 19 +++++++++++++------ dge_workflow/dge_utils.sh | 3 +-- dge_workflow/fastqc_summary.R | 8 ++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/chipseq_workflow/chipseq_utils.sh b/chipseq_workflow/chipseq_utils.sh index cbd6bc3..75ceae0 100755 --- a/chipseq_workflow/chipseq_utils.sh +++ b/chipseq_workflow/chipseq_utils.sh @@ -49,24 +49,31 @@ scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", pal #+ fig.height=nrow(algnSummary) ggplot(algnSummary, aes(condition, mapping_efficiency)) + geom_bar(stat="identity") + - coord_flip() + ylim(0,100) + + coord_flip() + ggtitle("mapping efficiency") ggplot(algnSummary, aes(condition, num_reads)) + geom_bar(stat="identity") + coord_flip() + - ggtitle("read counts") +scale_y_continuous(labels=comma) + ggtitle("read counts") + + scale_y_continuous(labels=comma) ggplot(algnSummary, aes(condition, unique_mapper_prop)) + - geom_bar(stat="identity") + coord_flip() + + geom_bar(stat="identity") + + coord_flip() + ggtitle("unique-mapper proportions") + - scale_y_continuous(labels=comma) + scale_y_continuous(labels=percent) + + ylim(0,100) + ggplot(algnSummary, aes(condition, multi_mappers_prop)) + - geom_bar(stat="identity") + coord_flip() + + geom_bar(stat="identity") + + coord_flip() + ggtitle("multi-mapper proportions") + - scale_y_continuous(labels=comma) + scale_y_continuous(labels=percent) + + ylim(0,100) + #> ## Bam Correlation diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index d1b18b7..2e6b9c6 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -5,8 +5,7 @@ source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/lsf_utils.sh 2>&1 2>/dev/null) ## enable snippet spinning -source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null) - +source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/spinr/spin_utils.sh 2>&1 2>/dev/null) ## define common binaries export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH diff --git a/dge_workflow/fastqc_summary.R b/dge_workflow/fastqc_summary.R index a402dc3..2061feb 100755 --- a/dge_workflow/fastqc_summary.R +++ b/dge_workflow/fastqc_summary.R @@ -52,7 +52,7 @@ readCount <- function(statsFile){ readCounts <- fastqDataFiles %>% ldply(readCount) # %>% print_head() require.auto(scales) -#+ fig.width=12, fig.height=round(nrow(readCounts)/3) +#+ fig.width=12, fig.height=round(nrow(readCounts)/2) ggplot(readCounts, aes(run, num_reads)) + geom_bar(stat="identity") + coord_flip() + scale_y_continuous(labels=comma) + ggtitle("read counts") # #+ results='asis' @@ -75,7 +75,7 @@ readSummary <- function(statsFile){ qcSummary <- fastqDataFiles %>% ldply(readSummary) #' # Base Quality Distribution Summary -#+ fig.height=2+round(nrow(readCounts)/3), fig.width=12 +#+ fig.height=2+round(nrow(readCounts)/2), fig.width=12 qcSummary %>% ggplot(aes(score, run, fill=tolower(flag))) + geom_tile() + rotXlab() + @@ -94,7 +94,7 @@ dupLevels <- fastqDataFiles %>% ldply(function(statsFile){ }) require.auto(scales) -#+ fig.width=12, fig.height=round(nrow(dupLevels)/3) +#+ fig.width=12, fig.height=round(nrow(dupLevels)/2) ggplot(dupLevels, aes(run, dedup_proportion)) + geom_bar(stat="identity") + coord_flip() + scale_y_continuous(labels=percent) + ggtitle("unique_reads/total_reads") + ylim(0,1) @@ -133,7 +133,7 @@ baseQualities %>% ggplot(aes(reorder(Base, base_order), Mean, group=run, color=r runs <- with(baseQualities, as.data.frame(table(run))) -#+ warning=FALSE, fig.width=15, fig.height=3*ceiling(nrow(runs)/3) +#+ warning=FALSE, fig.width=15, fig.height=3*ceiling(nrow(runs)/2) ## http://stackoverflow.com/questions/12518387/can-i-create-an-empty-ggplot2-plot-in-r baseQualities %>% -- GitLab