Commit dbcfe018 authored by Andre Gohr's avatar Andre Gohr
Browse files

improves color code in KEGG pathway plots

parent 67bf3037
......@@ -84,6 +84,9 @@ load_pack(DT)
## load the data
geneLists <- read_tsv(opts$gene_lists_tsv)
group_col = opts$group_col
## these are the contrasts for which we have DEGs
## plot_score_matrix might contain more contrasts and should be reduced to contrasts with DEGs
contrasts_with_degs=unique(geneLists[,group_col])
geneLists %<>% group_by_(.dots = group_col)
......@@ -91,8 +94,32 @@ geneLists %<>% group_by_(.dots = group_col)
if (! is.null(opts$overlay_expr_data)) {
overlayData <- read_tsv(opts$overlay_expr_data)
# contrasts in overlayData for which we don't have DEGs should be discarded
column_headers=unlist(c(colnames(overlayData)[1],contrasts_with_degs))
overlayData=overlayData[,column_headers]
cat("colnames(overlayData):\n")
cat(colnames(overlayData))
cat("\n")
# here we transform (-1)*log10(p values) to
# -1: if gene is deg in that contrast and FC<0
# 1: if gene is deg in that contrast and FC>0
# 0: all other genes; we keep NA values
# This transformation is necessary for the KEGG pathways with color overlays
overlayData2=overlayData
for(contrast in unique(geneLists$contrast)){
colid=which(colnames(overlayData)==contrast)
degs=as.matrix(geneLists[ geneLists$contrast==contrast , 1 ])[,1]
ids1=which(is.element(overlayData$ensembl_gene_id,degs) & overlayData[,colid]<0)
ids2=which(is.element(overlayData$ensembl_gene_id,degs) & overlayData[,colid]>0)
#ids1=which(overlayData[,colid]<= -1.3)
#ids2=which(overlayData[,colid]>= 1.3)
overlayData2[!is.na(overlayData[,colid]),colid]=0
overlayData2[ids1,colid]=-1
overlayData2[ids2,colid]=1
}
overlayData=overlayData2
}
## TODO expose options to run gesa instead (by assuming that gene lists are sorted)
## see http://www.bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.pdf for details
......@@ -125,7 +152,6 @@ geneLists %>%
ggtitle("gene list sizes to be tested for term enrichment") +
ylab("")
cp_species <- guess_cp_species(geneLists$ensembl_gene_id)
annoDb <- guess_anno_db(geneLists$ensembl_gene_id) # e.g. "org.Hs.eg.db"
# cp_species <- "sce"
......@@ -460,7 +486,7 @@ session::save.session(".cp_enrichment.dat");
########################################################################################################################
#' # Enriched KEGG Pathways
#' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
#' To understand spatio-temporal changes in gene expression better, we overlay in the enriched KEGG pathways proteins/complexes with a color code: i) yellow = gene is significantly down-regulated in condition 1 vs conditon 2 (or in case of complexes some genes corresponding to it are down-regulated and the remaing, if any, non-significant), ii) blue = gene is significantly up-regulated (or in case of complexes some genes corresponding to it are up-regulated and the remaing, if any, non-significant), iii) red = in case of complexes some corresponding genes are significantly down- others up-regulated, iv) white = no significant DEGs, v) gray = missing values. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. When hovering over the boxes you will see a context menu that lists all genes per box and value: 1=up-regulated, 0=non-differential, -1=down-regulated, NA=missing value. To relate the barcode to contrasts we define the following order or bars:
hasEnrPathways=(!is.null(opts$overlay_expr_data) & (nrow(enrResults %>% filter(ontology=="kegg")) >0))
#+ eval=hasEnrPathways
......@@ -532,26 +558,98 @@ devtools::source_url(coreCommons)
load_pack(pathview)
load_pack(png)
# Here we hack internal function of the pathview package to allow us to color-highlight
# up- & down-regulated elements, contradicting elements, elements with NA meta data
# An element is one box in the pathways. Many correspond to exactly one gene, some correspong to several genes.
# In the latter case, some of the genes might be up- others down-regulated which is a constradicting case and should be highlighted, too.
max.abs.tweak=function (x, na.rm = TRUE)
{
if (na.rm)
if(length(which(!is.na(x)))==0){return(NA)}
x = x[!is.na(x)]
if(max(x)==1 && min(x)==-1){
return(2) # several genes map to same compound and some are up- others are down-regulated
}
midx = which.max(abs(x))
x[midx]
}
node.color.tweak=function (plot.data = NULL, discrete = FALSE, limit = 1, bins = 10,
both.dirs = TRUE, low = "green", mid = "gray", high = "red",
na.col = "transparent", trans.fun = NULL)
{
if (is.null(plot.data))
return(NULL)
node.summary = plot.data[, -c(1:8)]
if (length(dim(node.summary)) == 2) {
node.summary = as.matrix(node.summary)
}
else names(node.summary) = rownames(plot.data)
if (!is.null(trans.fun))
node.summary = trans.fun(node.summary)
if (both.dirs & length(limit) == 1) {
limit = c(-abs(limit), abs(limit))
}
else if (length(limit) == 1) {
limit = c(0, limit)
}
disc.cond1 = all(as.integer(limit) == limit)
disc.cond2 = (limit[2] - limit[1])%%bins == 0
if (discrete & disc.cond1 & disc.cond2) {
node.summary[] = as.integer(node.summary)
limit[2] = limit[2] + 1
bins = bins + 1
}
else if (discrete) {
message("Note: ", "limit or bins not proper, data not treated as discrete!")
}
# node.summary[node.summary > limit[2]] = limit[2]
# node.summary[node.summary < limit[1]] = limit[1]
if (both.dirs) {
cols = colorpanel2(bins, low = low, mid = mid, high = high)
}
else cols = colorpanel2(bins, low = mid, high = high)
cols=c(cols,'#FFCCCB')
na.col = colorpanel2(1, low = na.col, high = na.col)
#data.cuts = seq(from = limit[1], to = limit[2], length = bins + 1)
data.cuts=c(-1.5,-0.5,0.5,1.5,2.5)
index.ts = cols.ts = node.summary
index.ts[] = cut(node.summary, data.cuts, include.lowest = TRUE,
right = F)
cols.ts[] = cols[index.ts]
cols.ts[is.na(cols.ts)] = na.col
return(cols.ts)
}
# here we assign the tweaks to the internal function of the package pathview
environment(max.abs.tweak) <- asNamespace('pathview')
assignInNamespace("max.abs", max.abs.tweak, ns = "pathview")
environment(node.color.tweak) <- asNamespace('pathview')
assignInNamespace("node.color", node.color.tweak, ns = "pathview")
plot_pathway <- function(pathwayID, sliceData){
# pathwayID="mmu04015"; sliceData <- sliceData
# pathwayID="mmu05216"; sliceData <- sliceData
# pathwayID="mmu01230"; sliceData <- sliceData
# browser()
# echo("processing pathway", pathwayID)
pv.out <- pathview(
# gene.data = sliceData %>% add_rownames("ensembl_gene_id") %>% tbl_df %>% filter(ensembl_gene_id %in% c("ENSMUSG00000073901", "ENSMUSG00000032000", "ENSMUSG00000021025")) %>% column2rownames("ensembl_gene_id"),
gene.data = sliceData,
pathway.id = pathwayID,
species = keggOrCode,
# out.suffix = pathwayID$kegg.description,
# out.suffix = pathwayID,
multi.state = ncol(sliceData)>1,
# kegg.native=F,
node.sum = "max.abs", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
limit = list(gene = c(-4,4)),
gene.idtype="ensembl"
)
# to keep the tweaked verion working, don't change parameters here
pv.out <- pathview(
gene.data = sliceData,
pathway.id = pathwayID,
species = keggOrCode,
multi.state = ncol(sliceData)>1,
discrete=list(gene=FALSE,cpd=FALSE),
node.sum = "max.abs", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
limit = list(gene = c(-1,1),cpd=c(-1,1)),
low=list(gene='yellow',cpd='yellow'),
mid=list(gene='white',cpd='white'),
high=list(gene='lightblue',cpd='lightblue'),
na.col='gray',
bins=list(gene = 3,cpd=3),
gene.idtype="ensembl"
)
if (is.numeric(pv.out) && pv.out == 0) return(pathwayID) ## happens if pathview fails to parse xml e.g. for mmu01100
......@@ -581,7 +679,7 @@ plot_pathway <- function(pathwayID, sliceData){
pathwayPlots <- keggPathways %>%
# head(3) %>%
map(function(pathwayID){
# echo("processing pathway", pathwayID)
echo("processing pathway", pathwayID)
plot_pathway(pathwayID, sliceData)
})
......@@ -615,15 +713,30 @@ ens2entrez %<>%
## prepare tooltips with expression scores
toolTipData <- overlayData %>% left_join(ens2entrez)
toolTipData.cols=c("external_gene_name",grep(" vs ",colnames(toolTipData),value=TRUE))
blanks=rep('',50)
for(i in 2:50){
blanks[i]=paste(blanks[i-1],' ',sep='')
}
makeTooltip <- function(entrez_id){
toolTipData %>%
filter(entrezgene_id == entrez_id) %>%
dplyr::select(- entrezgene_id, - ensembl_gene_id) %>%
gather() %$% paste(key, value, sep = ": ") %>% paste(collapse = "\n") #%>% cat
makeTooltip <- function(entrez_ids){
entrez_ids=as.integer(strsplit(entrez_ids,",")[[1]])
toolTipData.tmp=toolTipData[is.element(toolTipData$entrezgene_id,entrez_ids),toolTipData.cols]
colnames(toolTipData.tmp)[1]="Gene"
gene.nchar.max=max(unlist(lapply(toolTipData.tmp[,1],nchar)))
ret=c(paste(colnames(toolTipData.tmp),collapse=" | "),apply(toolTipData.tmp,1,function(x){x=as.character(x);x[1]=paste(x[1],blanks[gene.nchar.max-nchar(x[1])+1],sep='');x[x=="0"]=" 0";x[x=="1"]=" 1";paste(x,collapse=" | ")}))
paste(ret,collapse="\n")
}
#makeTooltip <- function(entrez_id){
# toolTipData %>%
# filter(entrezgene_id %in% entrez_id) %>%
# dplyr::select(- entrezgene_id, - ensembl_gene_id) %>%
# gather() %$% paste(key, value, sep = ": ") %>% paste(collapse = "\n")
#}
#+ results="asis", echo=FALSE, eval=hasEnrPathways
......@@ -672,7 +785,8 @@ createImgMap <- function(plotData){
mutate(x = x - 22, y = y - 8) %>% ## todo does not work for non-gene elements
mutate(link = paste0("http://www.kegg.jp/dbget-bin/www_bget?", keggOrCode, ":", kegg.names)) %>%
rowwise() %>%
do({ curNode = .; mutate(as_df(curNode), tooltip = makeTooltip(as.integer(curNode$kegg.names)))})
# do({ curNode = .; mutate(as_df(curNode), tooltip = makeTooltip(as.integer(curNode$kegg.names)))})
do({ curNode = .; mutate(as_df(curNode), tooltip = makeTooltip(as.character(curNode$all.mapped)))})
## create tooltip by using mapping to
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment