fixed diffbind analysis by timepoint

dac77bc0 · Holger Brandl · 8f5633ac · dac77bc0
Commit dac77bc0 authored 9 years ago by Holger Brandl
--- a/misc/enr_simple.R
+++ b/misc/enr_simple.R
+
+
+########################################################################################################################
+#' ## Term enrichment
+
+#+ echo=FALSE
+
+#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`
+
+geneLists <- degs %>%
+    transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
+
+if(nrow(contrasts)<4){
+    geneLists <- rbind_list(
+        geneLists,
+        degs %>% filter(s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, ">", sample_2)),
+        degs %>% filter(!s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "<", sample_2))
+    )
+}
+
+## additional overlaps before doing the intersection analysis
+geneLists %>% count(list_id) %>% kable()
+
+intersectLists <- function(geneLists, listIdA, listIdB, intersectListId) {
+    commonGenes <- setdiff(filter(geneLists, list_id==listIdA)$ensembl_gene_id, filter(geneLists, list_id==listIdB)$ensembl_gene_id)
+    data.frame(list_id=intersectListId, ensembl_gene_id=commonGenes)
+}
+
+## make this project specific setting
+#with(geneLists, as.data.frame(table(list_id))) %$% ac(unique(list_id))
+#if(any(str_detect(geneLists$list_id, "cyst"))){
+#    geneLists <- intersectLists(geneLists, "liver != unpolarised", "cyst != unpolarised", "unp_liv & unp_cys") %>% rbind_list(geneLists)
+#}
+
+geneLists <- distinct(geneLists) ## just precaution in case of multiple evals of statements above
+
+
+#+ eval=T
+enrResults <- quote(geneLists %>% group_by(list_id) %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>%
+    cache_it(paste0("enrdata_", digest(geneLists)))
+
+write.delim(enrResults, file="enrResults.txt")
+# enrResults <- read.delim("enrResults.txt")
+#'  [Enrichment Results](enrResults.txt)
+
+#ac(annoChart$Genes[1]) %>% str_split(", ") %>% unlist() %>% length()
+
+#
+#' Because David is not too strigent by default we extract just those terms for which Bonferroni<0.01
+sigEnrResults <- filter(enrResults, Bonferroni <0.01)
+
+
+
+#' results='asis', error=TRUE
+if(nrow(sigEnrResults)>0){
+sigEnrResults %>% select(-Genes) %>% kable("html", table.attr = "class='dtable'", escape=F)
+}else{
+echo("no highly significant terms found")
+sigEnrResults = data.frame(Category=factor(c(), c()))
+}
+
+write.delim(sigEnrResults, file="sigEnrResults.txt")
+# sigEnrResults <- read.delim("sigEnrResults.txt")
+#'  [Very Significant Terms](sigEnrResults.txt)
+
+
+## plot the enrichment results
+#sigEnrResults %>% group_by(Category, add=T) %>% do({
+#    logPlot <- . %>% ggplot(aes(Term, PValue)) +
+#	    geom_bar(stat="identity")+coord_flip() +
+#	    xlab("Enriched Terms") +
+#	    ggtitle(.$Category[1]) +
+#	    scale_y_log10()
+#	    print(logPlot)
+#})
+
+#+ include=FALSE, eval=exists("sigEnrResults")
+## include=FALSE, eval=nrow(sigEnrResults)>1
+warning("dropping levels")
+sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette
+
+term_category_colors <- create_palette(unique(ac(sigEnrResults$Category)))
+
+term_barplot_files <- sigEnrResults %>%
+## chop and pad category names
+mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>%
+do({
+#browser()
+    enrResultsGrp <- .
+    ## DEBUG enrResultsGrp <- sigEnrResults
+    label <- enrResultsGrp$list_id[1]
+
+    if(nrow(enrResultsGrp)==0)  return(data.frame())
+    browser()
+
+#    warning(paste0("processing terms", paste(ac(unique(enrResultsGrp$Category)), collapse=",")))
+    logPlot <- enrResultsGrp %>%
+        ## fix factor order
+        mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(as.factor(Category)))) %>%
+        ggplot(aes(Term, PValue, fill=Category)) +
+        geom_bar(stat="identity")+
+        scale_fill_manual(values = term_category_colors, drop=F, name="Ontology") +
+        coord_flip() +
+        xlab("Enriched Terms") +
+        ggtitle(label) +
+        scale_y_log10()
+
+    fileNameLabel <- label %>%
+        str_replace_all("!=", "ne") %>%
+        str_replace_all(">", "gt") %>%
+        str_replace_all("<", "lt") %>%
+        str_replace_all(fixed("&"), "AND") %>%
+        str_replace_all(fixed("|"), "OR") %>%
+        str_replace_all(" ", "_")
+
+#        ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf"))
+#        print(logPlot)
+
+     tmpPng <- paste0("enrterms__", fileNameLabel, ".png")
+     ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE)
+     data.frame(file=tmpPng)
+})
+
+#+ results="asis", error=TRUE
+l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
+