From 9726b3698498bd04f91dd982f6de49c5d158e9c7 Mon Sep 17 00:00:00 2001 From: Holger Brandl <holgerbrandl@gmail.com> Date: Wed, 9 Dec 2015 15:12:30 +0100 Subject: [PATCH] improved cp enrichment --- common/cp_enrichment.R | 59 ++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/common/cp_enrichment.R b/common/cp_enrichment.R index e350856..d0bc70e 100755 --- a/common/cp_enrichment.R +++ b/common/cp_enrichment.R @@ -22,8 +22,8 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R") -require.auto(knitr) -require.auto(DT) +require_auto(knitr) +require_auto(DT) @@ -199,6 +199,8 @@ unloadNamespace('dplyr'); require(dplyr) ## tbd seriously??? #enrResults %<>% rename(Category=Description) #enrResults %<>% rename(ontology=Category) +## remove to clumsy gene_id columns +enrResults %<>% select(-geneID) write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt")) # enrResults <- read.delim(paste0(resultsBaseName, "enrResults.txt")) @@ -210,7 +212,7 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt")) facetSpecs <- paste("~", group_col %>% ac %>% paste(collapse=" + ")) #http://stackoverflow.com/questions/11028353/passing-string-variable-facet-wrap-in-ggplot-using-r -enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs)) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster") +enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs), ncol=3) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster") #' Keep at max 100 terms for visualzation per group @@ -260,7 +262,6 @@ enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dot #}) ##ggsave2() -## include=FALSE, error=TRUE #+ error=TRUE, echo=FALSE warning("dropping levels") @@ -310,6 +311,7 @@ do({ # ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf")) # print(logPlot) + stopifnot(str_length(fileNameLabel)>0) tmpPng <- paste0(figDir, "/enrterms__", fileNameLabel, ".png") ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE) data.frame(file=tmpPng) @@ -325,7 +327,7 @@ l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFi #' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order: ## todo why is tidyr not processing an empty dataframe -keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique() +keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique() # %>% head(3) ## todo remove debug head ##+ results='asis' #if(!exists("keggPathways") | nrow(keggPathways)==0){ @@ -333,8 +335,8 @@ keggPathways <- enrResults %>% filter(ontology=="kegg") %$% ac(ID) %>% unique() #} #+ echo=FALSE -require.auto(pathview) -require.auto(png) +require_auto(pathview) +require_auto(png) # keggPathways <- keggPathways[1] @@ -350,34 +352,39 @@ sliceData <- overlayData %>% # dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>% column2rownames("ensembl_gene_id") +sliceData <- sliceData[,1:5] + #sliceData %>% head %>% kable() data.frame(set=names(sliceData)) %>% mutate(slice_index=row_number()) %>% kable() pwPlotDir <-".pathways" + dir.create(pwPlotDir) -plot_pathway <- function(pathwayID, overlayData){ -# pathwayID="mmu04015" +plot_pathway <- function(pathwayID, sliceData){ +# pathwayID="mmu04015"; sliceData <- sliceData # browser() # echo("processing pathway", pathwayID) pv.out <- pathview( - gene.data = overlayData, + gene.data = sliceData, pathway.id = pathwayID, species = keggOrCode, # out.suffix = pathwayID$kegg.description, # out.suffix = pathwayID, - multi.state = ncol(overlayData)>1, + multi.state = ncol(sliceData)>1, # kegg.native=F, # node.sum = "mean", # the method name to calculate node summary given that multiple genes or compounds are mapped to it limit = list(gene = c(-4,4)), gene.idtype="ensembl" ) - outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png") + if(is.numeric(pv.out) && pv.out==0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100 + + outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(sliceData)>1, ".multi", ""), ".png") ## move pathway plots into figures sub-directory - figuresPlotFile <- file.path(".pathways", outfile) + figuresPlotFile <- file.path(pwPlotDir, outfile) system(paste("mv", outfile, figuresPlotFile)) system(paste("rm", paste0(pathwayID, ".xml"), paste0(pathwayID, ".png"))) @@ -405,12 +412,25 @@ pathwayPlots <- keggPathways %>% llply(function(pathwayID){ save(pathwayPlots, file=".pathwayPlots.RData") -# pathwayPlots <- local(get(load("pathwayPlots.RData"))) +# pathwayPlots <- local(get(load(".pathwayPlots.RData"))) + + +## remove and report failed ones +failedPPs <- pathwayPlots[lapply(pathwayPlots, is.character) > 0] +echo("the following pathways failed to render:", paste(failedPPs, collapse=", ")) +pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0] + + +## make sure that all files exist +stopifnot(llply(pathwayPlots, function(x) file.exists(x$plotfile)) %>% unlist %>% all) ## prepare tooltips with expression scores ens2entrez <- quote({ - mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl")) +# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl")) + ## todo fix this https://support.bioconductor.org/p/74322/ + mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")) + biomaRt::getBM(attributes=c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart=mart) %>% filter(!is.na(entrezgene)) }) %>% cache_it("ens2entrez") %>% distinct(ensembl_gene_id) #unlen(ens2entrez$ensembl_gene_id) @@ -445,12 +465,15 @@ makeTooltip <- function(entrez_id){ ## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map ## extended version with clickable links -pathwayPlots %>% l_ply(function(plotData){ +createImgMap <- function(plotData){ +#browser() + warning("compiling overlay toolips...") #pngFile="mmu04015.pathview.png" #plotData <- pathwayPlots[[1]] #plotData <- unlist(pathwayPlots) # pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1] pngFile <- plotData$plotfile + stopifnot(file.exists(pngFile)) pathway_id=plotData$pathway_id ## create link for image map @@ -481,7 +504,9 @@ pathwayPlots %>% l_ply(function(plotData){ }) cat("</map></p><br>") -}) +} + +pathwayPlots %>% l_ply(createImgMap) ## respin it for cild inclusion # require(knitr); setwd("/Volumes/projects/bioinfo/scripts/ngs_tools/dev/common"); spin("cp_enrichment.R", knit=F) -- GitLab