Skip to content
Snippets Groups Projects
Commit 9726b369 authored by Holger Brandl's avatar Holger Brandl
Browse files

improved cp enrichment

parent 8b1e2d8e
No related branches found
No related tags found
No related merge requests found
......@@ -22,8 +22,8 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R")
require.auto(knitr)
require.auto(DT)
require_auto(knitr)
require_auto(DT)
......@@ -199,6 +199,8 @@ unloadNamespace('dplyr'); require(dplyr) ## tbd seriously???
#enrResults %<>% rename(Category=Description)
#enrResults %<>% rename(ontology=Category)
## remove to clumsy gene_id columns
enrResults %<>% select(-geneID)
write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt"))
# enrResults <- read.delim(paste0(resultsBaseName, "enrResults.txt"))
......@@ -210,7 +212,7 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt"))
facetSpecs <- paste("~", group_col %>% ac %>% paste(collapse=" + "))
#http://stackoverflow.com/questions/11028353/passing-string-variable-facet-wrap-in-ggplot-using-r
enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs)) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster")
enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs), ncol=3) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster")
#' Keep at max 100 terms for visualzation per group
......@@ -260,7 +262,6 @@ enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dot
#})
##ggsave2()
## include=FALSE, error=TRUE
#+ error=TRUE, echo=FALSE
warning("dropping levels")
......@@ -310,6 +311,7 @@ do({
# ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf"))
# print(logPlot)
stopifnot(str_length(fileNameLabel)>0)
tmpPng <- paste0(figDir, "/enrterms__", fileNameLabel, ".png")
ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE)
data.frame(file=tmpPng)
......@@ -325,7 +327,7 @@ l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFi
#' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
## todo why is tidyr not processing an empty dataframe
keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique()
keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique() # %>% head(3) ## todo remove debug head
##+ results='asis'
#if(!exists("keggPathways") | nrow(keggPathways)==0){
......@@ -333,8 +335,8 @@ keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique()
#}
#+ echo=FALSE
require.auto(pathview)
require.auto(png)
require_auto(pathview)
require_auto(png)
# keggPathways <- keggPathways[1]
......@@ -350,34 +352,39 @@ sliceData <- overlayData %>%
# dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>%
column2rownames("ensembl_gene_id")
sliceData <- sliceData[,1:5]
#sliceData %>% head %>% kable()
data.frame(set=names(sliceData)) %>% mutate(slice_index=row_number()) %>% kable()
pwPlotDir <-".pathways"
dir.create(pwPlotDir)
plot_pathway <- function(pathwayID, overlayData){
# pathwayID="mmu04015"
plot_pathway <- function(pathwayID, sliceData){
# pathwayID="mmu04015"; sliceData <- sliceData
# browser()
# echo("processing pathway", pathwayID)
pv.out <- pathview(
gene.data = overlayData,
gene.data = sliceData,
pathway.id = pathwayID,
species = keggOrCode,
# out.suffix = pathwayID$kegg.description,
# out.suffix = pathwayID,
multi.state = ncol(overlayData)>1,
multi.state = ncol(sliceData)>1,
# kegg.native=F,
# node.sum = "mean", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
limit = list(gene = c(-4,4)),
gene.idtype="ensembl"
)
outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png")
if(is.numeric(pv.out) && pv.out==0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100
outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(sliceData)>1, ".multi", ""), ".png")
## move pathway plots into figures sub-directory
figuresPlotFile <- file.path(".pathways", outfile)
figuresPlotFile <- file.path(pwPlotDir, outfile)
system(paste("mv", outfile, figuresPlotFile))
system(paste("rm", paste0(pathwayID, ".xml"), paste0(pathwayID, ".png")))
......@@ -405,12 +412,25 @@ pathwayPlots <- keggPathways %>% llply(function(pathwayID){
save(pathwayPlots, file=".pathwayPlots.RData")
# pathwayPlots <- local(get(load("pathwayPlots.RData")))
# pathwayPlots <- local(get(load(".pathwayPlots.RData")))
## remove and report failed ones
failedPPs <- pathwayPlots[lapply(pathwayPlots, is.character) > 0]
echo("the following pathways failed to render:", paste(failedPPs, collapse=", "))
pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0]
## make sure that all files exist
stopifnot(llply(pathwayPlots, function(x) file.exists(x$plotfile)) %>% unlist %>% all)
## prepare tooltips with expression scores
ens2entrez <- quote({
mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
biomaRt::getBM(attributes=c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart=mart) %>% filter(!is.na(entrezgene))
}) %>% cache_it("ens2entrez") %>% distinct(ensembl_gene_id)
#unlen(ens2entrez$ensembl_gene_id)
......@@ -445,12 +465,15 @@ makeTooltip <- function(entrez_id){
## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map
## extended version with clickable links
pathwayPlots %>% l_ply(function(plotData){
createImgMap <- function(plotData){
#browser()
warning("compiling overlay toolips...")
#pngFile="mmu04015.pathview.png"
#plotData <- pathwayPlots[[1]]
#plotData <- unlist(pathwayPlots)
# pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
pngFile <- plotData$plotfile
stopifnot(file.exists(pngFile))
pathway_id=plotData$pathway_id
## create link for image map
......@@ -481,7 +504,9 @@ pathwayPlots %>% l_ply(function(plotData){
})
cat("</map></p><br>")
})
}
pathwayPlots %>% l_ply(createImgMap)
## respin it for cild inclusion
# require(knitr); setwd("/Volumes/projects/bioinfo/scripts/ngs_tools/dev/common"); spin("cp_enrichment.R", knit=F)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment