diff --git a/common/enrichemt_notest.md b/common/enrichemt_notest.md new file mode 100755 index 0000000000000000000000000000000000000000..4d317856658b57d0c6515a7f858d22bc4df68274 --- /dev/null +++ b/common/enrichemt_notest.md @@ -0,0 +1,3 @@ + +http://amp.pharm.mssm.edu/Enrichr/ +http://amp.pharm.mssm.edu/Harmonizome/ diff --git a/common/plot_kegg_pathways.R b/common/plot_kegg_pathways.R new file mode 100755 index 0000000000000000000000000000000000000000..c7f812598759002f7e312baba374e783ea443f40 --- /dev/null +++ b/common/plot_kegg_pathways.R @@ -0,0 +1,206 @@ +#!/usr/bin/env Rscript +#+ echo=FALSE, error=F + + +suppressMessages(require(docopt)) + +## todo use textual input here for ease of use +doc <- ' +Perform an enrichment analysis for a set of genes +Usage: plot_kegg_pathways.R <overlay_data_tsv> <pathways>... +' + +opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining +# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" ) + +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/core_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R") + +require_auto(knitr) +require_auto(DT) +require_auto(readr) + + + +## todo why is tidyr not processing an empty dataframe +keggPathways <- opts$pathways + +##+ results='asis' +#if(!exists("keggPathways") | nrow(keggPathways)==0){ +# cat("No enriched pathways found") +#} + +#+ echo=FALSE +require_auto(pathview) +require_auto(png) + +# keggPathways <- keggPathways[1] + +#if(nrow(keggPathways)==0){ +# echo("no enriched kegg pathways were found in the dataset") +#}else{ + +#keggOrCode <- "mmu" +keggOrCode <- guess_pathview_species(geneLists$ensembl_gene_id) + +## prepare p-value data +sliceData <- overlayData %>% + # dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>% + column2rownames("ensembl_gene_id") + +#sliceData <- sliceData[,1:5] + +#sliceData %>% head %>% kable() + +data.frame(set=names(sliceData)) %>% mutate(slice_index=row_number()) %>% kable() + +pwPlotDir <-".pathways" + +dir.create(pwPlotDir) + +plot_pathway <- function(pathwayID, sliceData){ + # pathwayID="mmu04015"; sliceData <- sliceData + # browser() + # echo("processing pathway", pathwayID) + pv.out <- pathview( + gene.data = sliceData, + pathway.id = pathwayID, + species = keggOrCode, + # out.suffix = pathwayID$kegg.description, + # out.suffix = pathwayID, + multi.state = ncol(sliceData)>1, + # kegg.native=F, + # node.sum = "mean", # the method name to calculate node summary given that multiple genes or compounds are mapped to it + limit = list(gene = c(-4,4)), + gene.idtype="ensembl" + ) + + if(is.numeric(pv.out) && pv.out==0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100 + + outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(sliceData)>1, ".multi", ""), ".png") + + ## move pathway plots into figures sub-directory + figuresPlotFile <- file.path(pwPlotDir, outfile) + system(paste("mv", outfile, figuresPlotFile)) + + system(paste("rm", paste0(pathwayID, ".xml"), paste0(pathwayID, ".png"))) + + ## interactive plotting + # ima <- readPNG(outfile) + # plot.new() + # lim <- par() + # rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]) + + pv.out$plotfile=figuresPlotFile + pv.out$pathway_id=pathwayID + + return(pv.out) +} + +#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015") +#keggPathways <- c("mmu00830") + + + +pathwayPlots <- keggPathways %>% llply(function(pathwayID){ + plot_pathway(pathwayID, sliceData) +}) + + +save(pathwayPlots, file=".pathwayPlots.RData") +# pathwayPlots <- local(get(load(".pathwayPlots.RData"))) + + +## remove and report failed ones +failedPPs <- pathwayPlots[lapply(pathwayPlots, is.character) > 0] +echo("the following pathways failed to render:", paste(failedPPs, collapse=", ")) +pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0] + + +## make sure that all files exist +stopifnot(llply(pathwayPlots, function(x) file.exists(x$plotfile)) %>% unlist %>% all) + + +## prepare tooltips with expression scores +ens2entrez <- quote({ + # mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl")) + ## todo fix this https://support.bioconductor.org/p/74322/ + mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")) + + biomaRt::getBM(attributes=c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart=mart) %>% filter(!is.na(entrezgene)) +}) %>% cache_it("ens2entrez") %>% distinct(ensembl_gene_id) +#unlen(ens2entrez$ensembl_gene_id) + +toolTipData <- overlayData %>% left_join(ens2entrez) + +makeTooltip <- function(entrez_id){ + toolTipData %>% filter(entrezgene==entrez_id) %>% dplyr::select(-entrezgene) %>% gather() %$% paste(key, value, sep=": ") %>% paste(collapse="\n") #%>% cat +} +#entrez_id=14679 +#entrez_id=34234234234234 +#makeTooltip("14679") + + + +#+ results="asis", echo=FALSE + +## simple non-clickable plots +## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of +#unlist(lapply(pathwayPlots, "[[", "plotfile")) +# unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))}) + +#cat(" +#<style type='text/css'> +# img { +# max-width: 100%; +# } +#</style> +#") + +## todo add tooltips with additinal info +## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map + +## extended version with clickable links +createImgMap <- function(plotData){ + #browser() + warning("compiling overlay toolips...") + #pngFile="mmu04015.pathview.png" + #plotData <- pathwayPlots[[1]] + #plotData <- unlist(pathwayPlots) + # pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1] + pngFile <- plotData$plotfile + stopifnot(file.exists(pngFile)) + pathway_id=plotData$pathway_id + + ## create link for image map + keggNodes <- plotData$plot.data.gene %>% + ## remove box offset + mutate(x=x-22, y=y-8) %>% ## todo does not work for non-gene elements + mutate(link=paste0("http://www.kegg.jp/dbget-bin/www_bget?", keggOrCode, ":", kegg.names )) %>% + rowwise() %>% do({ curNode=.; mutate(as.df(curNode), tooltip=makeTooltip(as.integer(curNode$kegg.names)))}) + + ## create tooltip by using mapping to + + + # "http://www.kegg.jp/dbget-bin/www_bget?mmu:16412 + + ## first the image itself + # cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>")) + # cat(paste0("<p><div style='width: 2000px'><img style='float: left' usemap='#", pathway_id,"' src='", pngFile, "'></div><br>")) + ## it seems that formatting works better without the left alignment + cat(paste0("<p><div style='width: 2000px'><img usemap='#", pathway_id,"' src='", pngFile, "'></div><br>")) + cat(paste0("<map name='", pathway_id,"'>")) + #keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)} + + #see http://www.html-world.de/180/image-map/ + keggNodes %>% a_ply(1, function(curNode){ + rectDef=with(curNode, paste(x, y, x+width, y+height, sep=",")) + # paste0("<area href='", curNode$link, "' alt='Text' coords='", rectDef , "' shape='rect'>") %>% cat() + paste0("<area href='", curNode$link, "' title='", curNode$tooltip, "' alt='Text' coords='",rectDef , "' shape='rect'>") %>% cat() + + }) + cat("</map></p><br>") +} + +pathwayPlots %>% l_ply(createImgMap) diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index be49fe1325ddba0671e635edd501582e9da62bb2..b8678ab659b4137f3f95d4a9e8cc86418bbcc7e1 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -35,6 +35,15 @@ export PATH=$PATH:/projects/bioinfo/scripts/spinr # which tophat; which bowtie2; which cuffdiff +mcdir(){ + if [ ! -d "$1" ]; then + mkdir "$1"; + fi; + + cd "$1"; +} +export -f mcdir + ## no longer needed because packages are no kept in home #export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work