Something went wrong on our end
-
Holger Brandl authoredHolger Brandl authored
enr_simple.R 4.45 KiB
########################################################################################################################
#' ## Term enrichment
#+ echo=FALSE
#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`
geneLists <- degs %>%
transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
if(nrow(contrasts)<4){
geneLists <- rbind_list(
geneLists,
degs %>% filter(s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, ">", sample_2)),
degs %>% filter(!s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "<", sample_2))
)
}
## additional overlaps before doing the intersection analysis
geneLists %>% count(list_id) %>% kable()
intersectLists <- function(geneLists, listIdA, listIdB, intersectListId) {
commonGenes <- setdiff(filter(geneLists, list_id==listIdA)$ensembl_gene_id, filter(geneLists, list_id==listIdB)$ensembl_gene_id)
data.frame(list_id=intersectListId, ensembl_gene_id=commonGenes)
}
## make this project specific setting
#with(geneLists, as.data.frame(table(list_id))) %$% ac(unique(list_id))
#if(any(str_detect(geneLists$list_id, "cyst"))){
# geneLists <- intersectLists(geneLists, "liver != unpolarised", "cyst != unpolarised", "unp_liv & unp_cys") %>% rbind_list(geneLists)
#}
geneLists <- distinct(geneLists) ## just precaution in case of multiple evals of statements above
#+ eval=T
enrResults <- quote(geneLists %>% group_by(list_id) %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>%
cache_it(paste0("enrdata_", digest(geneLists)))
write.delim(enrResults, file="enrResults.txt")
# enrResults <- read.delim("enrResults.txt")
#' [Enrichment Results](enrResults.txt)
#ac(annoChart$Genes[1]) %>% str_split(", ") %>% unlist() %>% length()
#
#' Because David is not too strigent by default we extract just those terms for which Bonferroni<0.01
sigEnrResults <- filter(enrResults, Bonferroni <0.01)
#' results='asis', error=TRUE
if(nrow(sigEnrResults)>0){
sigEnrResults %>% select(-Genes) %>% kable("html", table.attr = "class='dtable'", escape=F)
}else{
echo("no highly significant terms found")
sigEnrResults = data.frame(Category=factor(c(), c()))
}
write.delim(sigEnrResults, file="sigEnrResults.txt")
# sigEnrResults <- read.delim("sigEnrResults.txt")
#' [Very Significant Terms](sigEnrResults.txt)
## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
# logPlot <- . %>% ggplot(aes(Term, PValue)) +
# geom_bar(stat="identity")+coord_flip() +
# xlab("Enriched Terms") +
# ggtitle(.$Category[1]) +
# scale_y_log10()
# print(logPlot)
#})
#+ include=FALSE, eval=exists("sigEnrResults")
## include=FALSE, eval=nrow(sigEnrResults)>1
warning("dropping levels")
sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette
term_category_colors <- create_palette(unique(ac(sigEnrResults$Category)))
term_barplot_files <- sigEnrResults %>%
## chop and pad category names
mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>%
do({
#browser()
enrResultsGrp <- .
## DEBUG enrResultsGrp <- sigEnrResults
label <- enrResultsGrp$list_id[1]
if(nrow(enrResultsGrp)==0) return(data.frame())
browser()
# warning(paste0("processing terms", paste(ac(unique(enrResultsGrp$Category)), collapse=",")))
logPlot <- enrResultsGrp %>%
## fix factor order
mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(as.factor(Category)))) %>%
ggplot(aes(Term, PValue, fill=Category)) +
geom_bar(stat="identity")+
scale_fill_manual(values = term_category_colors, drop=F, name="Ontology") +
coord_flip() +
xlab("Enriched Terms") +
ggtitle(label) +
scale_y_log10()
fileNameLabel <- label %>%
str_replace_all("!=", "ne") %>%
str_replace_all(">", "gt") %>%
str_replace_all("<", "lt") %>%
str_replace_all(fixed("&"), "AND") %>%
str_replace_all(fixed("|"), "OR") %>%
str_replace_all(" ", "_")
# ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf"))
# print(logPlot)
tmpPng <- paste0("enrterms__", fileNameLabel, ".png")
ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE)
data.frame(file=tmpPng)
})
#+ results="asis", error=TRUE
l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})