Skip to content
Snippets Groups Projects
Commit d3352827 authored by Holger Brandl's avatar Holger Brandl
Browse files

continued clustering analysis

parent 40fddf5f
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env Rscript
#+ cache=FALSE
#+ echo=FALSE
suppressMessages(require(docopt))
......@@ -14,6 +14,7 @@ Options:
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt geneClusters.RData" )
#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt grpdGoiClusters.RData" )
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/ggplot_commons.R")
......@@ -24,7 +25,8 @@ require.auto(knitr)
## to fix child support issue with knitr, see also
## http://stackoverflow.com/questions/20030523/knitr-nested-child-documents
## https://github.com/yihui/knitr/issues/38
if(exists('project_dir')) setwd(project_dir)
# todo disabled because root.dir in parent document seems the only working solution
#if(exists('project_dir')) setwd(project_dir)
#print(getwd())
## load the data
......@@ -55,16 +57,6 @@ geneLists %>% inner_join(listLabels) %>%
ylab("")
#geneLists <- degs %>%
## transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_"))
# transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
#
#geneLists <- if(split_hit_list){
# degs %>% group_by(sample_1, sample_2, sample_1_overex)
#}else{
# degs %>% group_by(sample_1, sample_2)
#}
#enrResults <- geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))
enrResults <- quote(geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>%
cache_it(paste0("enrdata_", digest(geneLists)))
......@@ -76,10 +68,14 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt"))
sigEnrResults <- subset(enrResults, Bonferroni <0.01)
nrow(enrResults)
nrow(sigEnrResults)
write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt"))
# sigEnrResults <- read.delim(paste0(resultsBaseName, "sigEnrResults.txt"))
#' [Very Significant Terms](`r paste0(resultsBaseName, "sigEnrResults.txt")`)
#+ include=FALSE, eval=FALSE
## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
# logPlot <- . %>% ggplot(aes(Term, PValue)) +
......@@ -116,7 +112,9 @@ write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt"))
#})
##ggsave2()
#+ include=FALSE
## include=FALSE, error=TRUE
#+ error=TRUE, echo=FALSE
warning("dropping levels")
sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette
......@@ -166,30 +164,24 @@ do({
#+ results="asis"
l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type))
#sigEnrResults <- arrange(sigEnrResults, site_type, set, -Bonferroni)
#sigEnrResults$Term <-lockFactorOrder(sigEnrResults$Term)
#ggplot(sigEnrResults, aes(Term, -log10(Bonferroni))) +coord_flip() + geom_bar() + facet_wrap(~site_type + set)
#ggplot(sigEnrResults, aes(reorder(set, -Bonferroni), -log10(Bonferroni), label=Term)) + geom_text(alpha=0.6, size=3) + ggtitle("enriched terms by set") + scale_y_log10()
#ggsave2(width=14, height=12)
#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t")
########################################################################################################################
#' ## Enriched KEGG Pathways
# eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0
# ' ## Enriched KEGG Pathways
#+ eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0
#' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
## todo why is tidyr not processing an empty dataframe
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>%
with(kegg_pathway_id) %>% ac() %>% unique()
##+ results='asis'
#if(!exists("keggPathways") | nrow(keggPathways)==0){
# cat("No enriched pathways found")
#}
#+ echo=FALSE
require(pathview)
require(png)
......@@ -281,7 +273,7 @@ makeTooltip <- function(entrez_id){
#+ results="asis"
#+ results="asis", echo=FALSE
## simple non-clickable plots
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
......
```{r }
#!/usr/bin/env Rscript
```{r cache=FALSE}
```{r echo=FALSE}
suppressMessages(require(docopt))
......@@ -16,6 +16,7 @@ Options:
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt geneClusters.RData" )
#opts <- docopt(doc, "--overlay_expr_data ../ctrl_fc_expr_filtered.txt grpdGoiClusters.RData" )
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.11/R/ggplot_commons.R")
......@@ -26,7 +27,8 @@ require.auto(knitr)
## to fix child support issue with knitr, see also
## http://stackoverflow.com/questions/20030523/knitr-nested-child-documents
## https://github.com/yihui/knitr/issues/38
if(exists('project_dir')) setwd(project_dir)
# todo disabled because root.dir in parent document seems the only working solution
#if(exists('project_dir')) setwd(project_dir)
#print(getwd())
## load the data
......@@ -59,16 +61,6 @@ geneLists %>% inner_join(listLabels) %>%
ylab("")
#geneLists <- degs %>%
## transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_"))
# transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
#
#geneLists <- if(split_hit_list){
# degs %>% group_by(sample_1, sample_2, sample_1_overex)
#}else{
# degs %>% group_by(sample_1, sample_2)
#}
#enrResults <- geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))
enrResults <- quote(geneLists %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>%
cache_it(paste0("enrdata_", digest(geneLists)))
......@@ -83,13 +75,16 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt"))
```{r }
sigEnrResults <- subset(enrResults, Bonferroni <0.01)
nrow(enrResults)
nrow(sigEnrResults)
write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt"))
# sigEnrResults <- read.delim(paste0(resultsBaseName, "sigEnrResults.txt"))
```
[Very Significant Terms](`r paste0(resultsBaseName, "sigEnrResults.txt")`)
```{r }
```{r include=FALSE, eval=FALSE}
## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
# logPlot <- . %>% ggplot(aes(Term, PValue)) +
......@@ -126,7 +121,9 @@ write.delim(sigEnrResults, file=paste0(resultsBaseName, "sigEnrResults.txt"))
#})
##ggsave2()
```{r include=FALSE}
## include=FALSE, error=TRUE
```{r error=TRUE, echo=FALSE}
warning("dropping levels")
sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette
......@@ -176,36 +173,29 @@ do({
```{r results="asis"}
l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type))
#sigEnrResults <- arrange(sigEnrResults, site_type, set, -Bonferroni)
#sigEnrResults$Term <-lockFactorOrder(sigEnrResults$Term)
#ggplot(sigEnrResults, aes(Term, -log10(Bonferroni))) +coord_flip() + geom_bar() + facet_wrap(~site_type + set)
#ggplot(sigEnrResults, aes(reorder(set, -Bonferroni), -log10(Bonferroni), label=Term)) + geom_text(alpha=0.6, size=3) + ggtitle("enriched terms by set") + scale_y_log10()
#ggsave2(width=14, height=12)
#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t")
########################################################################################################################
```
## Enriched KEGG Pathways
```{r }
# eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0
```{r eval=nrow(sigEnrResults %>% filter(Category=="KEGG_PATHWAY")) >0}
```
To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
```{r }
## todo why is tidyr not processing an empty dataframe
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>%
with(kegg_pathway_id) %>% ac() %>% unique()
```{r results='asis'}
#if(!exists("keggPathways") | nrow(keggPathways)==0){
# cat("No enriched pathways found")
#}
```{r echo=FALSE}
require(pathview)
require(png)
......@@ -297,7 +287,7 @@ makeTooltip <- function(entrez_id){
```{r results="asis"}
```{r results="asis", echo=FALSE}
## simple non-clickable plots
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment