Skip to content
Snippets Groups Projects
Commit 3bb6959a authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. enrichement analysis

parent 4a9b12be
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,7 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v ...@@ -24,6 +24,7 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v
require_auto(knitr) require_auto(knitr)
require_auto(DT) require_auto(DT)
require_auto(readr)
...@@ -35,7 +36,7 @@ require_auto(DT) ...@@ -35,7 +36,7 @@ require_auto(DT)
#print(getwd()) #print(getwd())
## load the data ## load the data
geneLists <- read.delim(opts$gene_lists_tsv) geneLists <- read_tsv(opts$gene_lists_tsv)
group_col=opts$group_col group_col=opts$group_col
geneLists %<>% group_by_(.dots=group_col) geneLists %<>% group_by_(.dots=group_col)
...@@ -43,7 +44,7 @@ geneLists %<>% group_by_(.dots=group_col) ...@@ -43,7 +44,7 @@ geneLists %<>% group_by_(.dots=group_col)
#geneLists %<>% filter(cluster %in% c("cluster_1", "cluster_2")) #geneLists %<>% filter(cluster %in% c("cluster_1", "cluster_2"))
if(!is.null(opts$overlay_expr_data)){ if(!is.null(opts$overlay_expr_data)){
overlayData <- read.delim(opts$overlay_expr_data) overlayData <- read_tsv(opts$overlay_expr_data)
} }
## TODO expose options to run gesa instead (by assuming that gene lists are sorted) ## TODO expose options to run gesa instead (by assuming that gene lists are sorted)
...@@ -52,11 +53,11 @@ if(!is.null(opts$overlay_expr_data)){ ...@@ -52,11 +53,11 @@ if(!is.null(opts$overlay_expr_data)){
resultsBaseName <- if(str_length(opts$project)>0) paste0(opts$project, ".") else basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".") resultsBaseName <- if(str_length(opts$project)>0) paste0(opts$project, ".") else basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".")
#resultsBaseName=basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".") #resultsBaseName=basename(opts$gene_lists_tsv) %>% trim_ext("txt") #%>% paste0(".")
unloadNamespace('dplyr'); require(dplyr)
######################################################################################################################## ########################################################################################################################
#' ## Enrichment Analysis #' ## Enrichment Analysis
unloadNamespace('dplyr'); require(dplyr)
#' This analysis was performed using [clusterProfiler](http://bioconductor.org/packages/release/bioc/html/clusterProfiler.html). The following ontologies were tested: Kegg, Go, Reactome, Dose, #' This analysis was performed using [clusterProfiler](http://bioconductor.org/packages/release/bioc/html/clusterProfiler.html). The following ontologies were tested: Kegg, Go, Reactome, Dose,
...@@ -71,6 +72,7 @@ geneLists %>% inner_join(listLabels) %>% ...@@ -71,6 +72,7 @@ geneLists %>% inner_join(listLabels) %>%
ylab("") ylab("")
## todo move to diffex commons
guess_cp_species <- function(ensIds){ guess_cp_species <- function(ensIds){
an_id <-ensIds[1] an_id <-ensIds[1]
...@@ -139,7 +141,8 @@ glMapped <- glMappedRaw %>% filter(!is.na(entrez_gene_id)) %>% select(-ensembl_g ...@@ -139,7 +141,8 @@ glMapped <- glMappedRaw %>% filter(!is.na(entrez_gene_id)) %>% select(-ensembl_g
## TODO expose as argument ## TODO expose as argument
pCutoff <- 0.05 #pCutoff <- 0.05
qCutoff <- 0.01
## retain just 1500 genes at max per group ## retain just 1500 genes at max per group
...@@ -167,12 +170,12 @@ cp_test <- function(geneIds){ ...@@ -167,12 +170,12 @@ cp_test <- function(geneIds){
# PANTHER10_ontology <- read.delim("http://data.pantherdb.org/PANTHER10.0/ontology/Protein_Class_7.0") # PANTHER10_ontology <- read.delim("http://data.pantherdb.org/PANTHER10.0/ontology/Protein_Class_7.0")
# pantherResults <- enricher(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, TERM2GENE = PANTHER10_ontology) %>% summary() # pantherResults <- enricher(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE, TERM2GENE = PANTHER10_ontology) %>% summary()
keggResults <- enrichKEGG(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, use_internal_data=T) %>% summary() keggResults <- enrichKEGG(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE, use_internal_data=T) %>% summary()
reactomeResults <- enrichPathway(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE) %>% summary() reactomeResults <- enrichPathway(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE) %>% summary()
goResultsCC <- enrichGO(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, ont = "CC") %>% summary() goResultsCC <- enrichGO(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE, ont = "CC") %>% summary()
goResultsMF <- enrichGO(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, ont = "MF") %>% summary() goResultsMF <- enrichGO(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE, ont = "MF") %>% summary()
goResultsBP <- enrichGO(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, ont = "BP") %>% summary() goResultsBP <- enrichGO(gene = geneIds, organism = cpSpecies, qvalueCutoff = qCutoff, readable = TRUE, ont = "BP") %>% summary()
enrResults <- rbind_list( enrResults <- rbind_list(
mutate(keggResults, ontology="kegg"), mutate(keggResults, ontology="kegg"),
...@@ -206,22 +209,37 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt")) ...@@ -206,22 +209,37 @@ write.delim(enrResults, file=paste0(resultsBaseName, "enrResults.txt"))
# enrResults <- read.delim(paste0(resultsBaseName, "enrResults.txt")) # enrResults <- read.delim(paste0(resultsBaseName, "enrResults.txt"))
#' [Enrichment Results](`r paste0(resultsBaseName, "enrResults.txt")`) #' [Enrichment Results](`r paste0(resultsBaseName, "enrResults.txt")`)
require_auto(DT)
datatable(enrResults)
#enrResults %>% ggplot(aes(pvalue)) + geom_histogram() + scale_x_log10() #enrResults %>% ggplot(aes(pvalue)) + geom_histogram() + scale_x_log10()
#enrResults %>% ggplot(aes(ontology)) + facet_wrap(~cluster) + geom_bar() + rot_x_lab() #enrResults %>% ggplot(aes(ontology)) + facet_wrap(~cluster) + geom_bar() + rot_x_lab()
#facetSpecs <- paste("~", groups(geneLists) %>% ac %>% paste(collapse=" + ")) #facetSpecs <- paste("~", groups(geneLists) %>% ac %>% paste(collapse=" + "))
facetSpecs <- paste("~", group_col %>% ac %>% paste(collapse=" + ")) facetSpecs <- paste("~", group_col %>% ac %>% paste(collapse=" + "))
#' Visualize term-pvalues per list
#http://stackoverflow.com/questions/11028353/passing-string-variable-facet-wrap-in-ggplot-using-r #http://stackoverflow.com/questions/11028353/passing-string-variable-facet-wrap-in-ggplot-using-r
enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs), ncol=3) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster") enrResults %>% ggplot(aes(ontology)) + facet_wrap(as.formula(facetSpecs), ncol=3) + geom_bar() + rot_x_lab() + ggtitle("enriched term counts by cluster")
enrResults %<>% mutate(num_term_genes=str_split_fixed(BgRatio, fixed("/"),2)[,1] %>% as.numeric)
#' Keep at max 100 terms for visualzation per group #' Keep at max 100 terms for visualzation per group
#if(table(enrResults$cluster)) #if(table(enrResults$cluster))
## restablish the grouping and limit results per group to 100 ## restablish the grouping and limit results per group to 100
#enrResults %<>% group_by_(.dots=groups(geneLists) %>% ac) #enrResults %<>% group_by_(.dots=groups(geneLists) %>% ac)
#enrResults %<>% group_by_(.dots=group_col) #enrResults %<>% group_by_(.dots=group_col)
enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dots=group_col) #enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dots=group_col)
#tt <- enrResults %>% group_by_(.dots=c(group_col,"ontology")) %>% arrange(-qvalue) %>% dplyr::slice(1:10)
#enrResults %>% filter(cluster=="cluster_1")
#enrResults %>% group_by_(.dots=c(group_col)) %>% ggplot(aes(pvalue, qvalue, color=ontology))+ geom_point()
#enrResults %>% group_by_(.dots=c(group_col)) %>% ggplot(aes(pvalue, p.adjust, color=cluster))+ geom_point()
erPlotData <- enrResults %>% group_by_(.dots=c(group_col)) %>% arrange(qvalue) %>% slice(1:10) %>%
## regroup because otherwise dplyr complains about corrupt df (which looks like a bug)
group_by_(.dots=c(group_col))
#count(erPlotData, cluster)
...@@ -265,15 +283,15 @@ enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dot ...@@ -265,15 +283,15 @@ enrResults %<>% sample_frac(1) %>% filter((row_number()<100)) %>% group_by_(.dot
#+ error=TRUE, echo=FALSE #+ error=TRUE, echo=FALSE
warning("dropping levels") warning("dropping levels")
enrResults %<>% mutate(ontology=ac(ontology)) ## drop unsused level to get consistent color palette erPlotData %<>% mutate(ontology=ac(ontology)) ## drop unsused level to get consistent color palette
enrResults %<>%rename(Term=Description) erPlotData %<>% rename(Term=Description)
term_category_colors <- create_palette(unique(ac(enrResults$ontology))) term_category_colors <- create_palette(unique(ac(erPlotData$ontology)))
figDir <- "enr_charts" figDir <- "enr_charts"
dir.create(figDir) dir.create(figDir)
term_barplot_files <- enrResults %>% term_barplot_files <- erPlotData %>%
## chop and pad category names ## chop and pad category names
mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>% mutate(Term=str_sub(Term, 1, 70) %>% str_pad(70)) %>%
do({ do({
...@@ -289,9 +307,9 @@ do({ ...@@ -289,9 +307,9 @@ do({
logPlot <- enrResultsGrp %>% logPlot <- enrResultsGrp %>%
## fix factor order ## fix factor order
mutate(Term=reorder(Term, -qvalue) %>% reorder(as.integer(as.factor(ontology)))) %>% mutate(Term=reorder(Term, -qvalue) %>% reorder(as.integer(as.factor(ontology)))) %>%
ggplot(aes(Term, qvalue, fill=ontology)) + ggplot(aes(Term, num_term_genes, fill=qvalue, color=ontology)) +
geom_bar(stat="identity")+ geom_bar(stat="identity")+
scale_fill_manual(values = term_category_colors, drop=F, name="Ontology") + scale_color_manual(values = term_category_colors, drop=F, name="Ontology") +
coord_flip() + coord_flip() +
xlab("Enriched Terms") + xlab("Enriched Terms") +
ggtitle(label) + ggtitle(label) +
...@@ -352,7 +370,7 @@ sliceData <- overlayData %>% ...@@ -352,7 +370,7 @@ sliceData <- overlayData %>%
# dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>% # dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>%
column2rownames("ensembl_gene_id") column2rownames("ensembl_gene_id")
sliceData <- sliceData[,1:5] #sliceData <- sliceData[,1:5]
#sliceData %>% head %>% kable() #sliceData %>% head %>% kable()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment