diff --git a/common/david_enrichment.R b/common/david_enrichment.R index b27e97db833116f00025bacbacaf8027e7cb4874..349aac184ae1ecab0974dd36d5b97fda29fba46c 100755 --- a/common/david_enrichment.R +++ b/common/david_enrichment.R @@ -1,5 +1,5 @@ #!/usr/bin/env Rscript -#+ cache=FALSE +#+ echo=FALSE suppressMessages(require(docopt)) @@ -14,6 +14,7 @@ Options: opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining #opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt geneClusters.RData" ) +#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt grpdGoiClusters.RData" ) devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/ggplot_commons.R") @@ -24,7 +25,8 @@ require.auto(knitr) ## to fix child support issue with knitr, see also ## http://stackoverflow.com/questions/20030523/knitr-nested-child-documents ## https://github.com/yihui/knitr/issues/38 -if(exists('project_dir')) setwd(project_dir) +# todo disabled because root.dir in parent document seems the only working solution +#if(exists('project_dir')) setwd(project_dir) #print(getwd()) ## load the data @@ -55,16 +57,6 @@ geneLists %>% inner_join(listLabels) %>% ylab("") -#geneLists <- degs %>% -## transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_")) -# transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2)) -# -#geneLists <- if(split_hit_list){ -# degs %>% group_by(sample_1, sample_2, sample_1_overex) -#}else{ -# degs %>% group_by(sample_1, sample_2) -#} - #enrResults <- geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id)) enrResults <- quote(geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>% cache_it(paste0("enrdata_", digest(geneLists))) @@ -76,10 +68,14 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt")) sigEnrResults <- subset(enrResults, Bonferroni <0.01) +nrow(enrResults) +nrow(sigEnrResults) + write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt")) # sigEnrResults <- read.delim(paste0(resultsBaseName, "sigEnrResults.txt")) #' [Very Significant Terms](`r paste0(resultsBaseName, "sigEnrResults.txt")`) +#+ include=FALSE, eval=FALSE ## plot the enrichment results #sigEnrResults %>% group_by(Category, add=T) %>% do({ # logPlot <- . %>% ggplot(aes(Term, PValue)) + @@ -116,7 +112,9 @@ write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt")) #}) ##ggsave2() -#+ include=FALSE +## include=FALSE, error=TRUE + +#+ error=TRUE, echo=FALSE warning("dropping levels") sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette @@ -166,30 +164,24 @@ do({ #+ results="asis" l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))}) - - -#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type)) - -#sigEnrResults <- arrange(sigEnrResults, site_type, set, -Bonferroni) -#sigEnrResults$Term <-lockFactorOrder(sigEnrResults$Term) - -#ggplot(sigEnrResults, aes(Term, -log10(Bonferroni))) +coord_flip() + geom_bar() + facet_wrap(~site_type + set) -#ggplot(sigEnrResults, aes(reorder(set, -Bonferroni), -log10(Bonferroni), label=Term)) + geom_text(alpha=0.6, size=3) + ggtitle("enriched terms by set") + scale_y_log10() -#ggsave2(width=14, height=12) - -#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t") - ######################################################################################################################## -#' ## Enriched KEGG Pathways -# eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0 +# ' ## Enriched KEGG Pathways +#+ eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0 #' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order: +## todo why is tidyr not processing an empty dataframe keggPathways <- sigEnrResults %>% filter(Category=="KEGG_PATHWAY") %>% separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>% with(kegg_pathway_id) %>% ac() %>% unique() +##+ results='asis' +#if(!exists("keggPathways") | nrow(keggPathways)==0){ +# cat("No enriched pathways found") +#} + +#+ echo=FALSE require(pathview) require(png) @@ -281,7 +273,7 @@ makeTooltip <- function(entrez_id){ -#+ results="asis" +#+ results="asis", echo=FALSE ## simple non-clickable plots ## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of diff --git a/common/david_enrichment.Rmd b/common/david_enrichment.Rmd index 36ac12fa3126ed6ed80cf0e30fb506fc5c64e98c..199767683f66b3640460a8ee2b627780e4ea84dc 100755 --- a/common/david_enrichment.Rmd +++ b/common/david_enrichment.Rmd @@ -1,7 +1,7 @@ ```{r } #!/usr/bin/env Rscript -```{r cache=FALSE} +```{r echo=FALSE} suppressMessages(require(docopt)) @@ -16,6 +16,7 @@ Options: opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining #opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt geneClusters.RData" ) +#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt grpdGoiClusters.RData" ) devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/ggplot_commons.R") @@ -26,7 +27,8 @@ require.auto(knitr) ## to fix child support issue with knitr, see also ## http://stackoverflow.com/questions/20030523/knitr-nested-child-documents ## https://github.com/yihui/knitr/issues/38 -if(exists('project_dir')) setwd(project_dir) +# todo disabled because root.dir in parent document seems the only working solution +#if(exists('project_dir')) setwd(project_dir) #print(getwd()) ## load the data @@ -59,16 +61,6 @@ geneLists %>% inner_join(listLabels) %>% ylab("") -#geneLists <- degs %>% -## transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_")) -# transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2)) -# -#geneLists <- if(split_hit_list){ -# degs %>% group_by(sample_1, sample_2, sample_1_overex) -#}else{ -# degs %>% group_by(sample_1, sample_2) -#} - #enrResults <- geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id)) enrResults <- quote(geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>% cache_it(paste0("enrdata_", digest(geneLists))) @@ -83,13 +75,16 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt")) ```{r } sigEnrResults <- subset(enrResults, Bonferroni <0.01) +nrow(enrResults) +nrow(sigEnrResults) + write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt")) # sigEnrResults <- read.delim(paste0(resultsBaseName, "sigEnrResults.txt")) ``` [Very Significant Terms](`r paste0(resultsBaseName, "sigEnrResults.txt")`) -```{r } +```{r include=FALSE, eval=FALSE} ## plot the enrichment results #sigEnrResults %>% group_by(Category, add=T) %>% do({ # logPlot <- . %>% ggplot(aes(Term, PValue)) + @@ -126,7 +121,9 @@ write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt")) #}) ##ggsave2() -```{r include=FALSE} +## include=FALSE, error=TRUE + +```{r error=TRUE, echo=FALSE} warning("dropping levels") sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette @@ -176,36 +173,29 @@ do({ ```{r results="asis"} l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))}) - - -#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type)) - -#sigEnrResults <- arrange(sigEnrResults, site_type, set, -Bonferroni) -#sigEnrResults$Term <-lockFactorOrder(sigEnrResults$Term) - -#ggplot(sigEnrResults, aes(Term, -log10(Bonferroni))) +coord_flip() + geom_bar() + facet_wrap(~site_type + set) -#ggplot(sigEnrResults, aes(reorder(set, -Bonferroni), -log10(Bonferroni), label=Term)) + geom_text(alpha=0.6, size=3) + ggtitle("enriched terms by set") + scale_y_log10() -#ggsave2(width=14, height=12) - -#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t") - ######################################################################################################################## ``` ## Enriched KEGG Pathways -```{r } -# eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0 +```{r eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0} ``` To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order: ```{r } +## todo why is tidyr not processing an empty dataframe keggPathways <- sigEnrResults %>% filter(Category=="KEGG_PATHWAY") %>% separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>% with(kegg_pathway_id) %>% ac() %>% unique() +```{r results='asis'} +#if(!exists("keggPathways") | nrow(keggPathways)==0){ +# cat("No enriched pathways found") +#} + +```{r echo=FALSE} require(pathview) require(png) @@ -297,7 +287,7 @@ makeTooltip <- function(entrez_id){ -```{r results="asis"} +```{r results="asis", echo=FALSE} ## simple non-clickable plots ## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of