Skip to content
Snippets Groups Projects
Commit 2499fcbf authored by Holger Brandl's avatar Holger Brandl
Browse files

cont pathway visualization

parent 4d6c50f3
No related branches found
No related tags found
No related merge requests found
http://amp.pharm.mssm.edu/Enrichr/
http://amp.pharm.mssm.edu/Harmonizome/
#!/usr/bin/env Rscript
#+ echo=FALSE, error=F
suppressMessages(require(docopt))
## todo use textual input here for ease of use
doc <- '
Perform an enrichment analysis for a set of genes
Usage: plot_kegg_pathways.R <overlay_data_tsv> <pathways>...
'
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" )
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R")
require_auto(knitr)
require_auto(DT)
require_auto(readr)
## todo why is tidyr not processing an empty dataframe
keggPathways <- opts$pathways
##+ results='asis'
#if(!exists("keggPathways") | nrow(keggPathways)==0){
# cat("No enriched pathways found")
#}
#+ echo=FALSE
require_auto(pathview)
require_auto(png)
# keggPathways <- keggPathways[1]
#if(nrow(keggPathways)==0){
# echo("no enriched kegg pathways were found in the dataset")
#}else{
#keggOrCode <- "mmu"
keggOrCode <- guess_pathview_species(geneLists$ensembl_gene_id)
## prepare p-value data
sliceData <- overlayData %>%
# dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>%
column2rownames("ensembl_gene_id")
#sliceData <- sliceData[,1:5]
#sliceData %>% head %>% kable()
data.frame(set=names(sliceData)) %>% mutate(slice_index=row_number()) %>% kable()
pwPlotDir <-".pathways"
dir.create(pwPlotDir)
plot_pathway <- function(pathwayID, sliceData){
# pathwayID="mmu04015"; sliceData <- sliceData
# browser()
# echo("processing pathway", pathwayID)
pv.out <- pathview(
gene.data = sliceData,
pathway.id = pathwayID,
species = keggOrCode,
# out.suffix = pathwayID$kegg.description,
# out.suffix = pathwayID,
multi.state = ncol(sliceData)>1,
# kegg.native=F,
# node.sum = "mean", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
limit = list(gene = c(-4,4)),
gene.idtype="ensembl"
)
if(is.numeric(pv.out) && pv.out==0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100
outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(sliceData)>1, ".multi", ""), ".png")
## move pathway plots into figures sub-directory
figuresPlotFile <- file.path(pwPlotDir, outfile)
system(paste("mv", outfile, figuresPlotFile))
system(paste("rm", paste0(pathwayID, ".xml"), paste0(pathwayID, ".png")))
## interactive plotting
# ima <- readPNG(outfile)
# plot.new()
# lim <- par()
# rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
pv.out$plotfile=figuresPlotFile
pv.out$pathway_id=pathwayID
return(pv.out)
}
#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015")
#keggPathways <- c("mmu00830")
pathwayPlots <- keggPathways %>% llply(function(pathwayID){
plot_pathway(pathwayID, sliceData)
})
save(pathwayPlots, file=".pathwayPlots.RData")
# pathwayPlots <- local(get(load(".pathwayPlots.RData")))
## remove and report failed ones
failedPPs <- pathwayPlots[lapply(pathwayPlots, is.character) > 0]
echo("the following pathways failed to render:", paste(failedPPs, collapse=", "))
pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0]
## make sure that all files exist
stopifnot(llply(pathwayPlots, function(x) file.exists(x$plotfile)) %>% unlist %>% all)
## prepare tooltips with expression scores
ens2entrez <- quote({
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
biomaRt::getBM(attributes=c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart=mart) %>% filter(!is.na(entrezgene))
}) %>% cache_it("ens2entrez") %>% distinct(ensembl_gene_id)
#unlen(ens2entrez$ensembl_gene_id)
toolTipData <- overlayData %>% left_join(ens2entrez)
makeTooltip <- function(entrez_id){
toolTipData %>% filter(entrezgene==entrez_id) %>% dplyr::select(-entrezgene) %>% gather() %$% paste(key, value, sep=": ") %>% paste(collapse="\n") #%>% cat
}
#entrez_id=14679
#entrez_id=34234234234234
#makeTooltip("14679")
#+ results="asis", echo=FALSE
## simple non-clickable plots
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
#unlist(lapply(pathwayPlots, "[[", "plotfile"))
# unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
#cat("
#<style type='text/css'>
# img {
# max-width: 100%;
# }
#</style>
#")
## todo add tooltips with additinal info
## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map
## extended version with clickable links
createImgMap <- function(plotData){
#browser()
warning("compiling overlay toolips...")
#pngFile="mmu04015.pathview.png"
#plotData <- pathwayPlots[[1]]
#plotData <- unlist(pathwayPlots)
# pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
pngFile <- plotData$plotfile
stopifnot(file.exists(pngFile))
pathway_id=plotData$pathway_id
## create link for image map
keggNodes <- plotData$plot.data.gene %>%
## remove box offset
mutate(x=x-22, y=y-8) %>% ## todo does not work for non-gene elements
mutate(link=paste0("http://www.kegg.jp/dbget-bin/www_bget?", keggOrCode, ":", kegg.names )) %>%
rowwise() %>% do({ curNode=.; mutate(as.df(curNode), tooltip=makeTooltip(as.integer(curNode$kegg.names)))})
## create tooltip by using mapping to
# "http://www.kegg.jp/dbget-bin/www_bget?mmu:16412
## first the image itself
# cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>"))
# cat(paste0("<p><div style='width: 2000px'><img style='float: left' usemap='#", pathway_id,"' src='", pngFile, "'></div><br>"))
## it seems that formatting works better without the left alignment
cat(paste0("<p><div style='width: 2000px'><img usemap='#", pathway_id,"' src='", pngFile, "'></div><br>"))
cat(paste0("<map name='", pathway_id,"'>"))
#keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)}
#see http://www.html-world.de/180/image-map/
keggNodes %>% a_ply(1, function(curNode){
rectDef=with(curNode, paste(x, y, x+width, y+height, sep=","))
# paste0("<area href='", curNode$link, "' alt='Text' coords='", rectDef , "' shape='rect'>") %>% cat()
paste0("<area href='", curNode$link, "' title='", curNode$tooltip, "' alt='Text' coords='",rectDef , "' shape='rect'>") %>% cat()
})
cat("</map></p><br>")
}
pathwayPlots %>% l_ply(createImgMap)
......@@ -35,6 +35,15 @@ export PATH=$PATH:/projects/bioinfo/scripts/spinr
# which tophat; which bowtie2; which cuffdiff
mcdir(){
if [ ! -d "$1" ]; then
mkdir "$1";
fi;
cd "$1";
}
export -f mcdir
## no longer needed because packages are no kept in home
#export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment