######################################################################################################################## #' ## Term enrichment #+ echo=FALSE #' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')` geneLists <- degs %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2)) if(nrow(contrasts)<4){ geneLists <- rbind_list( geneLists, degs %>% filter(s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, ">", sample_2)), degs %>% filter(!s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "<", sample_2)) ) } ## additional overlaps before doing the intersection analysis geneLists %>% count(list_id) %>% kable() intersectLists <- function(geneLists, listIdA, listIdB, intersectListId) { commonGenes <- setdiff(filter(geneLists, list_id==listIdA)$ensembl_gene_id, filter(geneLists, list_id==listIdB)$ensembl_gene_id) data.frame(list_id=intersectListId, ensembl_gene_id=commonGenes) } ## make this project specific setting #with(geneLists, as.data.frame(table(list_id))) %$% ac(unique(list_id)) #if(any(str_detect(geneLists$list_id, "cyst"))){ # geneLists <- intersectLists(geneLists, "liver != unpolarised", "cyst != unpolarised", "unp_liv & unp_cys") %>% rbind_list(geneLists) #} geneLists <- distinct(geneLists) ## just precaution in case of multiple evals of statements above #+ eval=T enrResults <- quote(geneLists %>% group_by(list_id) %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>% cache_it(paste0("enrdata_", digest(geneLists))) write.delim(enrResults, file="enrResults.txt") # enrResults <- read.delim("enrResults.txt") #' [Enrichment Results](enrResults.txt) #ac(annoChart$Genes[1]) %>% str_split(", ") %>% unlist() %>% length() # #' Because David is not too strigent by default we extract just those terms for which Bonferroni<0.01 sigEnrResults <- filter(enrResults, Bonferroni <0.01) #' results='asis', error=TRUE if(nrow(sigEnrResults)>0){ sigEnrResults %>% select(-Genes) %>% kable("html", table.attr = "class='dtable'", escape=F) }else{ echo("no highly significant terms found") sigEnrResults = data.frame(Category=factor(c(), c())) } write.delim(sigEnrResults, file="sigEnrResults.txt") # sigEnrResults <- read.delim("sigEnrResults.txt") #' [Very Significant Terms](sigEnrResults.txt) ## plot the enrichment results #sigEnrResults %>% group_by(Category, add=T) %>% do({ # logPlot <- . %>% ggplot(aes(Term, PValue)) + # geom_bar(stat="identity")+coord_flip() + # xlab("Enriched Terms") + # ggtitle(.$Category[1]) + # scale_y_log10() # print(logPlot) #}) #+ include=FALSE, eval=exists("sigEnrResults") ## include=FALSE, eval=nrow(sigEnrResults)>1 warning("dropping levels") sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette term_category_colors <- create_palette(unique(ac(sigEnrResults$Category))) term_barplot_files <- sigEnrResults %>% ## chop and pad category names mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>% do({ #browser() enrResultsGrp <- . ## DEBUG enrResultsGrp <- sigEnrResults label <- enrResultsGrp$list_id[1] if(nrow(enrResultsGrp)==0) return(data.frame()) browser() # warning(paste0("processing terms", paste(ac(unique(enrResultsGrp$Category)), collapse=","))) logPlot <- enrResultsGrp %>% ## fix factor order mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(as.factor(Category)))) %>% ggplot(aes(Term, PValue, fill=Category)) + geom_bar(stat="identity")+ scale_fill_manual(values = term_category_colors, drop=F, name="Ontology") + coord_flip() + xlab("Enriched Terms") + ggtitle(label) + scale_y_log10() fileNameLabel <- label %>% str_replace_all("!=", "ne") %>% str_replace_all(">", "gt") %>% str_replace_all("<", "lt") %>% str_replace_all(fixed("&"), "AND") %>% str_replace_all(fixed("|"), "OR") %>% str_replace_all(" ", "_") # ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf")) # print(logPlot) tmpPng <- paste0("enrterms__", fileNameLabel, ".png") ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE) data.frame(file=tmpPng) }) #+ results="asis", error=TRUE l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})