Commit 67bf3037 authored by herseman's avatar herseman
Browse files

Merge branch 'cp_enrichment_kegg_gene_symbols' into 'master'

replace entrez IDs by gene symbols for KEGG pathways

See merge request bioinfo/ngs_tools!2
parents 7907588b ecddafa0
......@@ -234,6 +234,54 @@ enrResults %<>% left_join(levdataDF, by = "ID")
## remove to clumsy gene_id columns
# enrResults %<>% select(- geneID)
## replace entrez IDs by gene symbols for predicted KEGG pathways
install_package("biomaRt")
library(rapportools)
biomart_db <- ifelse(!is.empty(opts$biomart_database), opts$biomart_database, guess_mart(geneLists$ensembl_gene_id))
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
# dataset = guess_mart(geneLists$ensembl_gene_id),
dataset = biomart_db,
host = "nov2020.archive.ensembl.org",
path = "/biomart/martservice",
archive = FALSE
)
ens2entrez <- biomaRt::getBM(attributes = c('ensembl_gene_id', 'entrezgene_id', 'external_gene_name'), mart = mart) %>%
filter(! is.na(entrezgene_id))
entrez2symbol <- ens2entrez %>%
filter(ensembl_gene_id %in% unique(geneLists$ensembl_gene_id)) %>%
distinct_all(entrezgene_id, external_gene_name) %>%
dplyr::group_by(entrezgene_id) %>%
summarize(external_gene_name = paste0(external_gene_name, collapse = ",")) %>%
ungroup()
if (any(enrResults$ontology == "kegg")) {
enrResultsKEGG <- enrResults %>%
filter(ontology == "kegg") %>%
mutate(entrezgene_id = str_split(geneID, "/")) %>% unnest(cols = c(entrezgene_id)) %>%
left_join(transmute(entrez2symbol, entrezgene_id = as.character(entrezgene_id), external_gene_name)) %>%
## if there is no gene symbol available the entrez gene ID will be listed
mutate(external_gene_name = ifelse(is.na(external_gene_name), entrezgene_id, external_gene_name)) %>%
select(-entrezgene_id, -geneID) %>%
group_by_at(vars(-external_gene_name)) %>%
summarize(geneID = paste0(external_gene_name, collapse = "/")) %>%
ungroup() %>%
## NOTE from dplyr 1.0 onward we can use: relocate(geneID, .after = qvalue)
push_right(c("geneID", "Count", "ontology", "min_level"))
enrResults %<>%
filter(ontology != "kegg") %>%
bind_rows(enrResultsKEGG)
}
write_tsv(enrResults, path = paste0(resultsBaseName, "enrResults.txt"))
# enrResults <- read_tsv(paste0(resultsBaseName, "enrResults.txt"))
#' [Enrichment Results](`r paste0(resultsBaseName, "enrResults.txt")`)
......@@ -321,7 +369,7 @@ erPlotData <- enrResults %>%
warning("dropping levels")
erPlotData %<>% mutate(ontology = ac(ontology)) ## drop unsused level to get consistent color palette
erPlotData %<>% rename(Term = Description)
erPlotData %<>% dplyr::rename(Term = Description)
term_category_colors <- create_palette(unique(ac(erPlotData$ontology)))
......@@ -561,35 +609,17 @@ pathwayPlots <- pathwayPlots[lapply(pathwayPlots, is.character) == 0]
stopifnot(map_lgl(pathwayPlots, ~ file.exists(.$plotfile)) %>% all)
install_package("biomaRt")
library(rapportools)
## prepare tooltips with expression scores
biomart_db <- ifelse(!is.empty(opts$biomart_database), opts$biomart_database, guess_mart(geneLists$ensembl_gene_id))
ens2entrez <- quote({
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host = "www.ensembl.org"))
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
# dataset = guess_mart(geneLists$ensembl_gene_id),
dataset = biomart_db,
host = "aug2017.archive.ensembl.org",
path = "/biomart/martservice",
archive = FALSE
)
biomaRt::getBM(attributes = c('ensembl_gene_id', 'entrezgene', 'external_gene_name'), mart = mart) %>% filter(! is.na(entrezgene))
}) %>%
cache_it("ens2entrez") %>%
ens2entrez %<>%
distinct_all(ensembl_gene_id)
#unlen(ens2entrez$ensembl_gene_id)
## prepare tooltips with expression scores
toolTipData <- overlayData %>% left_join(ens2entrez)
makeTooltip <- function(entrez_id){
toolTipData %>%
filter(entrezgene == entrez_id) %>%
dplyr::select(- entrezgene, - ensembl_gene_id) %>%
filter(entrezgene_id == entrez_id) %>%
dplyr::select(- entrezgene_id, - ensembl_gene_id) %>%
gather() %$% paste(key, value, sep = ": ") %>% paste(collapse = "\n") #%>% cat
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment