Skip to content
Snippets Groups Projects
enr_simple.R 4.45 KiB


########################################################################################################################
#' ## Term enrichment

#+ echo=FALSE

#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`

geneLists <- degs %>%
    transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))

if(nrow(contrasts)<4){
    geneLists <- rbind_list(
        geneLists,
        degs %>% filter(s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, ">", sample_2)),
        degs %>% filter(!s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "<", sample_2))
    )
}

## additional overlaps before doing the intersection analysis
geneLists %>% count(list_id) %>% kable()

intersectLists <- function(geneLists, listIdA, listIdB, intersectListId) {
    commonGenes <- setdiff(filter(geneLists, list_id==listIdA)$ensembl_gene_id, filter(geneLists, list_id==listIdB)$ensembl_gene_id)
    data.frame(list_id=intersectListId, ensembl_gene_id=commonGenes)
}

## make this project specific setting
#with(geneLists, as.data.frame(table(list_id))) %$% ac(unique(list_id))
#if(any(str_detect(geneLists$list_id, "cyst"))){
#    geneLists <- intersectLists(geneLists, "liver != unpolarised", "cyst != unpolarised", "unp_liv & unp_cys") %>% rbind_list(geneLists)
#}

geneLists <- distinct(geneLists) ## just precaution in case of multiple evals of statements above


#+ eval=T
enrResults <- quote(geneLists %>% group_by(list_id) %>% do(davidAnnotationChart(.$ensembl_gene_id))) %>%
    cache_it(paste0("enrdata_", digest(geneLists)))

write.delim(enrResults, file="enrResults.txt")
# enrResults <- read.delim("enrResults.txt")
#'  [Enrichment Results](enrResults.txt)

#ac(annoChart$Genes[1]) %>% str_split(", ") %>% unlist() %>% length()

#
#' Because David is not too strigent by default we extract just those terms for which Bonferroni<0.01
sigEnrResults <- filter(enrResults, Bonferroni <0.01)



#' results='asis', error=TRUE
if(nrow(sigEnrResults)>0){
sigEnrResults %>% select(-Genes) %>% kable("html", table.attr = "class='dtable'", escape=F)
}else{
echo("no highly significant terms found")
sigEnrResults = data.frame(Category=factor(c(), c()))
}

write.delim(sigEnrResults, file="sigEnrResults.txt")
# sigEnrResults <- read.delim("sigEnrResults.txt")
#'  [Very Significant Terms](sigEnrResults.txt)


## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
#    logPlot <- . %>% ggplot(aes(Term, PValue)) +
#	    geom_bar(stat="identity")+coord_flip() +
#	    xlab("Enriched Terms") +
#	    ggtitle(.$Category[1]) +
#	    scale_y_log10()
#	    print(logPlot)
#})

#+ include=FALSE, eval=exists("sigEnrResults")
## include=FALSE, eval=nrow(sigEnrResults)>1
warning("dropping levels")
sigEnrResults %<>% mutate(Category=ac(Category)) ## drop unsused level to get consistent color palette

term_category_colors <- create_palette(unique(ac(sigEnrResults$Category)))

term_barplot_files <- sigEnrResults %>%
## chop and pad category names
mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>%
do({
#browser()
    enrResultsGrp <- .
    ## DEBUG enrResultsGrp <- sigEnrResults
    label <- enrResultsGrp$list_id[1]

    if(nrow(enrResultsGrp)==0)  return(data.frame())
    browser()

#    warning(paste0("processing terms", paste(ac(unique(enrResultsGrp$Category)), collapse=",")))
    logPlot <- enrResultsGrp %>%
        ## fix factor order
        mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(as.factor(Category)))) %>%
        ggplot(aes(Term, PValue, fill=Category)) +
        geom_bar(stat="identity")+
        scale_fill_manual(values = term_category_colors, drop=F, name="Ontology") +
        coord_flip() +
        xlab("Enriched Terms") +
        ggtitle(label) +
        scale_y_log10()

    fileNameLabel <- label %>%
        str_replace_all("!=", "ne") %>%
        str_replace_all(">", "gt") %>%
        str_replace_all("<", "lt") %>%
        str_replace_all(fixed("&"), "AND") %>%
        str_replace_all(fixed("|"), "OR") %>%
        str_replace_all(" ", "_")

#        ggsave(paste0("enrichmed_terms__", fileNameLabel, ".pdf"))
#        print(logPlot)

     tmpPng <- paste0("enrterms__", fileNameLabel, ".png")
     ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE)
     data.frame(file=tmpPng)
})

#+ results="asis", error=TRUE
l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})