cp_enrichment.R 22.6 KB
Newer Older
1
#!/usr/bin/env Rscript
Holger Brandl's avatar
Holger Brandl committed
2
#+ echo=FALSE, error=F
3 4 5 6


suppressMessages(require(docopt))

Holger Brandl's avatar
Holger Brandl committed
7
## todo use textual input here for ease of use
8 9
doc <- '
Perform an enrichment analysis for a set of genes
Holger Brandl's avatar
Holger Brandl committed
10
Usage: cp_enrichment.R [options] <gene_lists_tsv> <group_col>
11 12 13

Options:
--overlay_expr_data Tsv with overlay data for the kegg pathways
14
--project <project_prefix>   Name to prefix all generated result files [default: ]
15 16
--qcutoff <qcutoff>             Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]

17 18 19
'

opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
20
# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" )
21

22
coreCommons = "https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/core_commons.R"
Holger Brandl's avatar
Holger Brandl committed
23
devtools::source_url(coreCommons)
24

25 26 27 28 29 30 31 32
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/ggplot_commons.R")

devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/common/cp_utils.R")
# source(interp_from_env("${NGS_TOOLS}/common/cp_utils.R"))

## also load diffex helpers for guess_mart
# source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/dge_workflow/diffex_commons.R")
33 34


35 36
# should be no longer needed
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/diffex_commons.R")
37 38
# devtools::source_url("https://www.dropbox.com/s/vkmm66n11qyunma/diffex_commons.R?dl=1")

39

40 41
load_pack(knitr)
load_pack(DT)
Holger Brandl's avatar
Holger Brandl committed
42

Holger Brandl's avatar
Holger Brandl committed
43
#devtools::session_info() # nice!
44 45 46 47 48 49 50 51 52

## to fix child support issue with knitr, see also
## http://stackoverflow.com/questions/20030523/knitr-nested-child-documents
## https://github.com/yihui/knitr/issues/38
# todo disabled because root.dir in parent document seems the only working solution
#if(exists('project_dir')) setwd(project_dir)
#print(getwd())

## load the data
53
geneLists <- read_tsv(opts$gene_lists_tsv)
54 55
group_col = opts$group_col
geneLists %<>% group_by_(.dots = group_col)
Holger Brandl's avatar
Holger Brandl committed
56

57 58 59

#geneLists %<>% filter(cluster %in% c("cluster_1", "cluster_2"))

60
if (! is.null(opts$overlay_expr_data)) {
61
    overlayData <- read_tsv(opts$overlay_expr_data)
62 63
}

Holger Brandl's avatar
Holger Brandl committed
64 65
## TODO expose options to run gesa instead (by assuming that gene lists are sorted)
## see http://www.bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.pdf for details
66

67 68
# resultsBaseName <- if (str_length(opts$project) > 0) paste0(opts$project, ".") else basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".")
resultsBaseName <- if (str_length(opts$project) > 0) paste0(opts$project, ".") else ""
69
#resultsBaseName=basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".")
70

71
q_cutoff <- as.numeric(opts$qcutoff)
72

73
########################################################################################################################
74
#' # Enrichment Analysis
75

76
#' Run configuration was
77 78 79
vec_as_df(unlist(opts)) %>%
    filter(! str_detect(name, "^[<-]")) %>%
    kable()
80

Holger Brandl's avatar
Holger Brandl committed
81
#' This analysis was performed using [clusterProfiler](http://bioconductor.org/packages/release/bioc/html/clusterProfiler.html). The following ontologies were tested: Kegg, Go, Reactome, Dose,
82

83 84 85 86
listLabels <- geneLists %>%
    select(- ensembl_gene_id) %>%
    distinct
listLabels %<>% transform(list_label = do.call(paste, c(listLabels, sep = "__")))
87

88 89
geneLists %>%
    inner_join(listLabels) %>%
90 91 92 93 94 95 96
    ggplot(aes(list_label)) +
    geom_bar() +
    coord_flip() +
    ggtitle("gene list sizes to be tested for term enrichment") +
    ylab("")


97
cp_species <- guess_cp_species(geneLists$ensembl_gene_id)
Holger Brandl's avatar
Holger Brandl committed
98
annoDb <- guess_anno_db(geneLists$ensembl_gene_id) # e.g. "org.Hs.eg.db"
99 100
# cp_species <- "sce"
# annoDb <- "org.Sc.sgd.db"
Holger Brandl's avatar
Holger Brandl committed
101 102 103 104

## supported ids
#idType("org.Hs.eg.db")

105
#' ## Convert to entrez gene ids
106

107
install_package("clusterProfiler")
Holger Brandl's avatar
Holger Brandl committed
108

109
glMappedRaw <- clusterProfiler::bitr(unique(geneLists$ensembl_gene_id), fromType = "ENSEMBL", toType = "ENTREZID", OrgDb = annoDb) %>%
Holger Brandl's avatar
Holger Brandl committed
110
    set_names("ensembl_gene_id", "entrez_gene_id") %>%
111 112
    right_join(geneLists) %>%
    tbl_df
Holger Brandl's avatar
Holger Brandl committed
113

114
unloadNamespace('clusterProfiler')
115
#load_pack(clusterProfiler)
116 117 118

reload_dplyr()

119 120 121 122 123
glMapped <- glMappedRaw %>%
    select(- ensembl_gene_id) %>%
## regroup the data
#    group_by_(cluster)
    group_by_(.dots = group_col %>% ac)
Holger Brandl's avatar
Holger Brandl committed
124

Holger Brandl's avatar
Holger Brandl committed
125

Holger Brandl's avatar
Holger Brandl committed
126 127
#' Check how many failed to map
#count(glMapped, !is.na(entrez_gene_id))
128
summarize(glMapped, non_mappable_prop = sum(is.na(entrez_gene_id)) / n()) %>% kable()
Holger Brandl's avatar
Holger Brandl committed
129 130 131


# .. and discard the failed ones
132
glMapped %<>% filter(! is.na(entrez_gene_id))
Holger Brandl's avatar
Holger Brandl committed
133 134


Holger Brandl's avatar
Holger Brandl committed
135 136 137 138
## retain just 1500 genes at max per group
#glMappedSub <- glMapped %>% do({shuffle(.) %>% head(1500)})
#count(glMappedSub, cluster)

139
## sync reactome pacakge to node
140 141 142
#if(Sys.getenv("LSF_SERVERDIR")!="" && dir.exists("/tmp/local_r_packages")){
#    system("if [ ! -d '/tmp/local_r_packages/reactome.db/' ]; then scp -r falcon:/tmp/local_r_packages /tmp ; fi")
#}
143

Holger Brandl's avatar
Holger Brandl committed
144
## prety slow
145
## user purrr::map here
146 147
enrResults <- quote(glMapped %>% do(cp_test(.$entrez_gene_id, annoDb, cp_species, q_cutoff = q_cutoff))) %>%
cache_it(paste0("enrdata_", digest(glMapped)))
Holger Brandl's avatar
Holger Brandl committed
148

149 150

# add information on GO levels:
151 152
load_pack(clusterProfiler)
load_pack(goProfiles)
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

getlevels <- function(x){
    dbs <- x %>% filter(grepl("go_", ontology)) %>% mutate(GOs = str_replace(ontology, "go_", ""))
    GOs <- unique(dbs$GOs)
    GOs <- toupper(GOs)
    #print(GOs)

    listofGOs <- list()
    for (db in GOs) {
        ont = db
        for (level in (1:50)) {
            terms <- getGOLevel(ont, level) %>% as.vector()
            level.name <- paste(ont, "level", level, sep="_")
            listofGOs[[level.name]] <- terms
        }
    }
    return(listofGOs)
}

levdata <- getlevels(enrResults)
173 174 175 176
levdataDF =  str_match(names(levdata), "_([0-9]*)$")[,2] %>%
    { data_frame(level=as.integer(.), term=levdata) } %>% unnest() %>%
    group_by(term) %>% slice(which.min(level)) %>%
    rename(min_level=level, ID=term)
177

178 179 180 181
enrResults %<>% left_join(levdataDF, by = "ID")
# enrResults[is.na(enrResults)] <- "-"
# enrResults %<>% mutate_at(vars(min_level), replaceNA, "-")
# enrResults$min_level %<>% replaceNA("-")
182 183 184



Holger Brandl's avatar
Holger Brandl committed
185
## run the actual enrichment test for all clusters and all ontologies
Holger Brandl's avatar
Holger Brandl committed
186 187 188
#library(foreach); library(doMC); registerDoMC(cores=20)
## https://support.bioconductor.org/p/38541/
## library(GOstats) inside your %dopar% loop. And then start with a fresh session, so GOstats is not loaded outside the loop. It worked for me.
189
#enrResults <- dlply(glMapped, groups(glMapped) %>% ac, function(curGroup){
Holger Brandl's avatar
Holger Brandl committed
190
#  cp_test(curGroup$entrez_gene_id)
191 192
#}, .progress="text", .parallel=F) ##%>% cache_it(paste0("enrdata_", digest(glMapped)))
#rbind_all(enrResults)
Holger Brandl's avatar
Holger Brandl committed
193

194
#reload_dplyr()
Holger Brandl's avatar
Holger Brandl committed
195

Holger Brandl's avatar
Holger Brandl committed
196 197 198
#enrResults %<>% rename(Category=Description)
#enrResults %<>% rename(ontology=Category)

Holger Brandl's avatar
Holger Brandl committed
199
## remove to clumsy gene_id columns
200
# enrResults %<>% select(- geneID)
201

202
write_tsv(enrResults, path = paste0(resultsBaseName, "enrResults.txt"))
203
# enrResults <- read_tsv(paste0(resultsBaseName, "enrResults.txt"))
204 205
#'  [Enrichment Results](`r paste0(resultsBaseName, "enrResults.txt")`)

206
load_pack(DT)
207 208
datatable(enrResults)

Holger Brandl's avatar
Holger Brandl committed
209 210
#enrResults %>% ggplot(aes(pvalue)) + geom_histogram() + scale_x_log10()
#enrResults %>% ggplot(aes(ontology)) + facet_wrap(~cluster) + geom_bar() + rot_x_lab()
Holger Brandl's avatar
Holger Brandl committed
211
#facetSpecs <- paste("~", groups(geneLists) %>% ac %>% paste(collapse=" + "))
212
facetSpecs <- paste("~", group_col %>% ac %>% paste(collapse = " + "))
Holger Brandl's avatar
Holger Brandl committed
213

214 215
#' Visualize term-pvalues per list

Holger Brandl's avatar
Holger Brandl committed
216
#http://stackoverflow.com/questions/11028353/passing-string-variable-facet-wrap-in-ggplot-using-r
217 218 219 220 221
enrResults %>% ggplot(aes(ontology)) +
    facet_wrap(as.formula(facetSpecs), ncol = 3) +
    geom_bar() +
    rot_x_lab() +
    ggtitle("enriched term counts by cluster")
222

223
enrResults %<>% mutate(num_term_genes = str_split_fixed(BgRatio, fixed("/"), 2)[, 1] %>% as.numeric)
224

Holger Brandl's avatar
Holger Brandl committed
225
#' Keep at max 100 terms for visualzation per group
Holger Brandl's avatar
Holger Brandl committed
226
#if(table(enrResults$cluster))
Holger Brandl's avatar
Holger Brandl committed
227
## restablish the grouping and limit results per group to 100
Holger Brandl's avatar
Holger Brandl committed
228 229
#enrResults %<>% group_by_(.dots=groups(geneLists) %>% ac)
#enrResults %<>% group_by_(.dots=group_col)
230 231 232 233 234 235
#enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dots=group_col)

#tt <- enrResults %>% group_by_(.dots=c(group_col,"ontology")) %>% arrange(-qvalue) %>% dplyr::slice(1:10)
#enrResults %>% filter(cluster=="cluster_1")
#enrResults %>% group_by_(.dots=c(group_col)) %>% ggplot(aes(pvalue, qvalue, color=ontology))+ geom_point()
#enrResults %>% group_by_(.dots=c(group_col)) %>% ggplot(aes(pvalue, p.adjust, color=cluster))+ geom_point()
236 237 238
erPlotData <- enrResults %>%
    group_by_(.dots = c(group_col)) %>%
    arrange(qvalue) %>%
239
    slice(1 : 20) %>%
240 241
## regroup because otherwise dplyr complains about corrupt df (which looks like a bug)
    group_by_(.dots = c(group_col))
Holger Brandl's avatar
Holger Brandl committed
242

243
#count(erPlotData, cluster)
Holger Brandl's avatar
Holger Brandl committed
244

Holger Brandl's avatar
Holger Brandl committed
245

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283

#+ include=FALSE, eval=FALSE
## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
#    logPlot <- . %>% ggplot(aes(Term, PValue)) +
#	    geom_bar(stat="identity")+coord_flip() +
#	    xlab("Enriched Terms") +
#	    ggtitle(.$Category[1]) +
#	    scale_y_log10()
#	    print(logPlot)
#})

#sigEnrResults %>%
##    select(-Genes) %>%
#    do({
#    enrResultsGrp <- .
#    ## DEBUG enrResultsGrp <- sigEnrResults
##    geneLists %>% first(1) %>% select(-ensembl_gene_id) %>% paste0(., collapse="_")
##browser()
#    label <- geneLists %>% semi_join(enrResultsGrp) %>% first(1) %>% dplyr::select(-ensembl_gene_id) %>% paste0(., collapse="_")
#
#    echo("processing", label)
#
#    logPlot <- enrResultsGrp %>%
#        ## fix factor order
#        mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(Category))) %>%
#        ggplot(aes(Term, PValue, fill=Category)) +
#	    geom_bar(stat="identity")+
#	    coord_flip() +
#	    xlab("Enriched Terms") +
#	    ggtitle(label) +
#	    scale_y_log10()
#
#	    ggsave(paste0(resultsBaseName, label, ".enrichmed_terms.pdf"))
#	    print(logPlot)
#})
##ggsave2()

284
#' Gene Ratio:
285 286
#+ error=TRUE, echo=FALSE
warning("dropping levels")
287
erPlotData %<>% mutate(ontology = ac(ontology)) ## drop unsused level to get consistent color palette
288

289
erPlotData %<>% rename(Term = Description)
290 291


292
term_category_colors <- create_palette(unique(ac(erPlotData$ontology)))
293

Holger Brandl's avatar
Holger Brandl committed
294 295
figDir <- "enr_charts"
dir.create(figDir)
296

297

298 299
## chop and pad category names
erPlotData %<>% mutate(fixed_width_term = str_sub(Term, 1, 70) %>% str_pad(70))
Holger Brandl's avatar
Holger Brandl committed
300

301 302
## evaluate gene ratios strings into actual proportions
erPlotData %<>% mutate(gene_ratio = map_dbl(GeneRatio, ~ eval(parse(text = .x))))
303

304
term_barplot_files = erPlotData %>% do({
305
    # DEBUG enrResultsGrp <- erPlotData %>% first_group()
306
    enrResultsGrp <- .
307

308 309 310 311 312
    label = subset(enrResultsGrp, select = group_col)[1, 1] %>%
        as.matrix %>%
        ac

    ## old version
Holger Brandl's avatar
Holger Brandl committed
313
    # enrPlot <- enrResultsGrp %>%
314 315 316 317 318 319 320 321 322 323 324
    # ## fix factor order
    # #        mutate(Term=reorder(Term, -qvalue) %>% reorder(as.integer(as.factor(ontology)))) %>%
    #     mutate(fixed_width_term = reorder(fixed_width_term, - qvalue)) %>%
    #     ggplot(aes(fixed_width_term, num_term_genes, fill = - log10(qvalue), color = ontology)) +
    #     geom_bar(stat = "identity") +
    #     scale_color_manual(values = term_category_colors, drop = F, name = "Ontology") +
    #     coord_flip() +
    #     xlab("Enriched Terms") +
    #     ggtitle(label) +
    #     scale_y_log10()

Holger Brandl's avatar
Holger Brandl committed
325
    # print(enrPlot)
326 327

    ## new version using dotplot https://bioconductor.org/packages/devel/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html#dotplot
Holger Brandl's avatar
Holger Brandl committed
328
    enrPlot = enrResultsGrp %>% ungroup %>%
329 330 331 332 333 334 335 336 337 338 339
    ## fix factor order
        mutate(fixed_width_term = reorder(fixed_width_term, gene_ratio)) %>%
        ggplot(aes(fixed_width_term, gene_ratio, size=Count, fill = qvalue, color = ontology)) +
        # ggplot(aes(fixed_width_term, Count)) +
            geom_point(pch=21) +
            scale_color_manual(values = term_category_colors, drop = F, name = "Ontology") +
            scale_fill_gradient(low="red", high="white", name = "q-value") +
            coord_flip() +
            # xlab("Enriched Terms") +
            ggtitle(label)
            # scale_y_log10()
340 341

    ## todo  use builtin method to create filesystem-compatible name
342
    label <- gsub('[^a-zA-Z0-9.!=><&|]', '_', label)
343 344 345 346 347 348
    fileNameLabel <- label %>%
        str_replace_all("!=", "ne") %>%
        str_replace_all(">", "gt") %>%
        str_replace_all("<", "lt") %>%
        str_replace_all(fixed("&"), "AND") %>%
        str_replace_all(fixed("|"), "OR") %>%
349
        str_replace_all(fixed("/"), "_") %>%
350 351
        str_replace_all(" ", "_")

352
    #        ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf"))
Holger Brandl's avatar
Holger Brandl committed
353
    #        print(enrPlot)
354

355 356
    stopifnot(str_length(fileNameLabel) > 0)
    tmpPng <- paste0(figDir, "/enrterms__", fileNameLabel, ".png")
Holger Brandl's avatar
Holger Brandl committed
357
    ggsave(tmpPng, enrPlot, width = 10, height = 2 + round(nrow(enrResultsGrp) / 5), limitsize = FALSE)
358
    data.frame(file = tmpPng)
359 360 361
})

#+ results="asis"
362
walk(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
363

364 365 366 367
# install.packages("session")
session::save.session(".cp_enrichment.dat");
# session::restore.session(".cp_enrichment.dat");

368
########################################################################################################################
369
#' # Enriched KEGG Pathways
370 371

#' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
372 373
hasEnrPathways=(!is.null(opts$overlay_expr_data) & (nrow(enrResults %>% filter(ontology=="kegg")) >0))
#+ eval=hasEnrPathways
374 375

## todo why is tidyr not processing an empty dataframe
376
keggPathways = enrResults %>% filter(ontology == "kegg") %$% ac(ID) %>% unique() # %>% head(3) ## todo remove debug head
Holger Brandl's avatar
Holger Brandl committed
377

378

Holger Brandl's avatar
Holger Brandl committed
379 380
## remove mmu04723 if present (see bug report https://support.bioconductor.org/p/95354/)
# keggPathways = c("mmu04660", "mmu03060", "mmu04110", "mmu04260", "mmu04723", "mmu05322", "mmu04114")
381
keggPathways %<>% purrr::discard(~ . == "mmu04723") ## overloaded by scales packages
382

383 384 385 386 387 388
# ## #+ results='asis', echo=FALSE
# if (! exists("keggPathways") || length(keggPathways) == 0) {
#     cat("No enriched pathways found")
#     return()
#     quit()
# }
389 390


391

392
load_pack(naturalsort)
393

394 395 396 397 398 399

#reload_dplyr()

#unloadNamespace('pathview')


400 401 402 403 404 405 406 407 408 409
# keggPathways <- keggPathways[1]

#if(nrow(keggPathways)==0){
#    echo("no enriched kegg pathways were found in the dataset")
#}else{

#keggOrCode <- "mmu"
keggOrCode <- guess_pathview_species(geneLists$ensembl_gene_id)

## prepare p-value data
410 411
#sliceData <- overlayData %>% column2rownames("ensembl_gene_id")

412 413
## sort slices in natural order # as.name see https://github.com/tidyverse/dplyr/issues/1392
overlayData %<>% select_(., .dots = naturalsort(ac(names(.))) %>% map(as.name))
414
sliceData <- overlayData %>% column2rownames("ensembl_gene_id")
415

416

417
#sliceData <- sliceData[,1:5]
Holger Brandl's avatar
Holger Brandl committed
418

419 420
#sliceData %>% head %>% kable()

421 422 423
data.frame(set = names(sliceData)) %>%
    mutate(slice_index = row_number()) %>%
    knitr::kable()
424

425

426
pwPlotDir <- ".pathways"
Holger Brandl's avatar
Holger Brandl committed
427

Holger Brandl's avatar
Holger Brandl committed
428
dir.create(pwPlotDir)
429

430 431 432 433 434 435
#Toxoplasmose debugging of 11796
#c(11796,11797,11798) %>%
#    clusterProfiler::bitr(fromType="ENTREZID", toType="ENSEMBL", annoDb="org.Mm.eg.db") %>%
#    set_names("entrez_gene_id", "ensembl_gene_id" ) %>%
#    inner_join( sliceData %>% add_rownames("ensembl_gene_id") )

436
## from http://stackoverflow.com/questions/7505547/detach-all-packages-while-working-in-r
437
unload_packages()
438

Holger Brandl's avatar
Holger Brandl committed
439
devtools::source_url(coreCommons)
440 441
load_pack(pathview)
load_pack(png)
442 443


Holger Brandl's avatar
Holger Brandl committed
444 445
plot_pathway <- function(pathwayID, sliceData){
#    pathwayID="mmu04015"; sliceData <- sliceData
446
#    pathwayID="mmu05216"; sliceData <- sliceData
447
#    pathwayID="mmu01230"; sliceData <- sliceData
448 449
#    browser()
#    echo("processing pathway", pathwayID)
450 451
     pv.out <- pathview(
#            gene.data = sliceData %>% add_rownames("ensembl_gene_id") %>% tbl_df %>% filter(ensembl_gene_id %in% c("ENSMUSG00000073901", "ENSMUSG00000032000", "ENSMUSG00000021025")) %>% column2rownames("ensembl_gene_id"),
Holger Brandl's avatar
Holger Brandl committed
452
            gene.data = sliceData,
453 454 455 456
            pathway.id = pathwayID,
            species = keggOrCode,
#            out.suffix = pathwayID$kegg.description,
#            out.suffix = pathwayID,
Holger Brandl's avatar
Holger Brandl committed
457
            multi.state = ncol(sliceData)>1,
458
#            kegg.native=F,
459
            node.sum = "max.abs", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
460 461 462 463
            limit = list(gene = c(-4,4)),
            gene.idtype="ensembl"
    )

464
    if (is.numeric(pv.out) && pv.out == 0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100
Holger Brandl's avatar
Holger Brandl committed
465

466
    outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(sliceData) > 1, ".multi", ""), ".png")
467
    if(!file.exists(outfile)) return(pathwayID) # seems to happen for generic non-pathways such as mmu01230,mmu01200,mmu01210
468 469

    ## move pathway plots into figures sub-directory
Holger Brandl's avatar
Holger Brandl committed
470
    figuresPlotFile <- file.path(pwPlotDir, outfile)
471 472 473 474 475
    system(paste("mv", outfile, figuresPlotFile))

    system(paste("rm", paste0(pathwayID, ".xml"), paste0(pathwayID, ".png")))

    ## interactive plotting
476
    #    ima <- readPNG(figuresPlotFile);     plot.new();     lim <- par();     rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]);
477

478 479
    pv.out$plotfile = figuresPlotFile
    pv.out$pathway_id = pathwayID
480 481 482 483 484

    return(pv.out)
}

#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015")
Holger Brandl's avatar
Holger Brandl committed
485
#keggPathways <- c("mmu00830")
486 487


488
#+ echo=FALSE, warning=FALSE, eval=hasEnrPathways
489
pathwayPlots <- keggPathways %>%
490 491 492 493 494
# head(3) %>%
map(function(pathwayID){
    # echo("processing pathway", pathwayID)
    plot_pathway(pathwayID, sliceData)
})
495 496


497
#save(pathwayPlots, file=".pathwayPlots.RData")
Holger Brandl's avatar
Holger Brandl committed
498 499
# pathwayPlots <- local(get(load(".pathwayPlots.RData")))

500 501 502
session::save.session(".cp_pathways.dat");
# session::restore.session(".cp_pathways.dat");

Holger Brandl's avatar
Holger Brandl committed
503 504 505

## remove and report failed ones
failedPPs <- pathwayPlots[lapply(pathwayPlots, is.character) > 0]
506
echo("the following pathways failed to render:", paste(failedPPs, collapse = ", "))
Holger Brandl's avatar
Holger Brandl committed
507 508 509
pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0]


510 511 512
# print names of pathways which could not be rendered
# pathwayPlots[!map_lgl(pathwayPlots, ~ file.exists(.$plotfile))] %>% map("pathway_id")
# purriefied solution: discard(pathwayPlots, ~ file.exists(.$plotfile)) %>% map("pathway_id")
513

514
# make sure that all files exist
515
stopifnot(map_lgl(pathwayPlots, ~ file.exists(.$plotfile)) %>% all)
516 517


Holger Brandl's avatar
Holger Brandl committed
518
install_package("biomaRt")
519

520 521
## prepare tooltips with expression scores
ens2entrez <- quote({
522 523 524 525 526 527 528 529 530
    #    mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
    ## todo fix this https://support.bioconductor.org/p/74322/
    mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host = "www.ensembl.org"))
    mart <- biomaRt::useDataset("mmusculus_gene_ensembl", mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host = "www.ensembl.org"))

    biomaRt::getBM(attributes = c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart = mart) %>% filter(! is.na(entrezgene))
}) %>%
    cache_it("ens2entrez") %>%
    distinct_all(ensembl_gene_id)
531 532 533 534 535
#unlen(ens2entrez$ensembl_gene_id)

toolTipData <- overlayData %>% left_join(ens2entrez)

makeTooltip <- function(entrez_id){
536 537
    toolTipData %>%
        filter(entrezgene == entrez_id) %>%
538
        dplyr::select(- entrezgene, - ensembl_gene_id) %>%
539
        gather() %$% paste(key, value, sep = ": ") %>% paste(collapse = "\n") #%>% cat
540 541 542 543
}



544
#+ results="asis", echo=FALSE, eval=hasEnrPathways
545 546 547 548 549 550 551 552 553 554 555 556 557 558

## simple non-clickable plots
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
#unlist(lapply(pathwayPlots, "[[", "plotfile"))
# unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})

#cat("
#<style type='text/css'>
# img {
#   max-width: 100%;
# }
#</style>
#")

559 560 561
pathwayName <- function(pathwayID, enrResults) enrResults %>%
    filter(ontology == "kegg") %>%
    filter(ID == pathwayID) %$% Description
562
#
563 564 565 566
## todo add tooltips with additinal info
## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map

## extended version with clickable links
Holger Brandl's avatar
Holger Brandl committed
567 568
createImgMap <- function(plotData){
    warning("compiling overlay toolips...")
569

570 571 572
    #    cat(paste0("pathway:", plotData$pathway_id) # todo rather show the pathway name here and create anhor list on top of section
    #    pathwayName("mmu05145", enrResults)
    cat(paste0("<h3>", pathwayName(plotData$pathway_id, enrResults), "</h3>"))
573

574 575 576
    #pngFile="mmu04015.pathview.png"
    #plotData <- pathwayPlots[[1]]
    #plotData <- unlist(pathwayPlots)
577
    #    pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
578
    pngFile <- plotData$plotfile
Holger Brandl's avatar
Holger Brandl committed
579
    stopifnot(file.exists(pngFile))
580
    pathway_id = plotData$pathway_id
581 582 583

    ## create link for image map
    keggNodes <- plotData$plot.data.gene %>%
584 585 586 587
    ## remove box offset
        mutate(x = x - 22, y = y - 8) %>% ## todo does not work for non-gene elements
        mutate(link = paste0("http://www.kegg.jp/dbget-bin/www_bget?", keggOrCode, ":", kegg.names)) %>%
        rowwise() %>%
588
        do({ curNode = .; mutate(as_df(curNode), tooltip = makeTooltip(as.integer(curNode$kegg.names)))})
589 590 591 592

    ## create tooltip by using mapping to


593
    #    "http://www.kegg.jp/dbget-bin/www_bget?mmu:16412
594 595

    ## first the image itself
596 597
    #    cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>"))
    #    cat(paste0("<p><div style='width: 2000px'><img style='float: left' usemap='#", pathway_id,"' src='", pngFile, "'></div><br>"))
Holger Brandl's avatar
Holger Brandl committed
598
    ## it seems that formatting works better without the left alignment
599 600
    cat(paste0("<p><div style='width: 2000px'><img usemap='#", pathway_id, "' src='", pngFile, "'></div><br>"))
    cat(paste0("<map name='", pathway_id, "'>"))
601 602 603
    #keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)}

    #see http://www.html-world.de/180/image-map/
604
    keggNodes %>% plyr::a_ply(1, function(curNode){
605 606 607
        rectDef = with(curNode, paste(x, y, x + width, y + height, sep = ","))
        #        paste0("<area href='", curNode$link, "' alt='Text' coords='", rectDef , "' shape='rect'>") %>% cat()
        paste0("<area href='", curNode$link, "' title='", curNode$tooltip, "' alt='Text' coords='", rectDef , "' shape='rect'>") %>% cat()
608 609
    })
    cat("</map></p><br>")
Holger Brandl's avatar
Holger Brandl committed
610 611
}

612
pathwayPlots %>% plyr::l_ply(createImgMap)
613 614

## respin it for cild inclusion
Holger Brandl's avatar
Holger Brandl committed
615
# require(knitr); setwd("/Volumes/projects/bioinfo/scripts/ngs_tools/dev/common"); spin("cp_enrichment.R", knit=F)
Holger Brandl's avatar
Holger Brandl committed
616

617
# }
Holger Brandl's avatar
Holger Brandl committed
618

619

Holger Brandl's avatar
Holger Brandl committed
620 621

#' System info:
622
#+
Holger Brandl's avatar
Holger Brandl committed
623 624

devtools::session_info()