Commit f4aa55d8 authored by Holger Brandl's avatar Holger Brandl

added support for chimpanzee

parent a2e5630c
...@@ -19,14 +19,18 @@ Options: ...@@ -19,14 +19,18 @@ Options:
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" ) # opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" )
coreCommons = "https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/core_commons.R" coreCommons = "https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/core_commons.R"
devtools::source_url(coreCommons) devtools::source_url(coreCommons)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/ggplot_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/ggplot_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/common/cp_utils.R")
# source(interp_from_env("${NGS_TOOLS}/common/cp_utils.R"))
## also load diffex helpers for guess_mart
# source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/dge_workflow/diffex_commons.R")
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/cp_utils.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v2/common/cp_utils.R")
# devtools::source_url("https://www.dropbox.com/s/p2b8luxf7jteb63/cp_utils.R?dl=1")
# should be no longer needed # should be no longer needed
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/diffex_commons.R") # devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/diffex_commons.R")
......
...@@ -11,25 +11,40 @@ install_package("clusterProfiler") ...@@ -11,25 +11,40 @@ install_package("clusterProfiler")
guess_cp_species <- function(ensIds){ guess_cp_species <- function(ensIds){
an_id <-ensIds[1] an_id <-ensIds[1]
if(str_detect(an_id, "ENSG")){ if(str_detect(an_id, "ENSG")){
return("human") return("human")
}else if(str_detect(an_id, "ENSMUSG")){ }else if(str_detect(an_id, "ENSMUSG")){
return("mouse") return("mouse")
}else if(str_detect(an_id, "ENSDARG")){ }else if(str_detect(an_id, "ENSDARG")){
return("zebrafish") return("zebrafish")
}else if(str_detect(an_id, "ENSPTRG")){
return("ptr")
}else if(str_detect(an_id, "FBgn")){ }else if(str_detect(an_id, "FBgn")){
return("fly") return("fly")
}else if(.is_yeast(ensIds)){ }else if(.is_yeast(ensIds)){
return("yeast") return("yeast")
}else{ }else{
stop(paste("could not clusterProfiler species name from ", an_id)) stop(paste("could not guess clusterProfiler species name from ", an_id))
} }
} }
## todo how to guess yeast ("org.Sc.sgd.db") from id? # todo automatically install org-db if missing in guess_anno_db call
## see http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
# source("http://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
#biocLite("org.Hs.eg.db")
#biocLite("org.Dr.eg.db")
#biocLite("org.Dm.eg.db")
#biocLite("org.Sc.sgd.db")
#biocLite("org.Pt.eg.db")
#biocLite("KEGG.db")
#biocLite("ReactomePA")
guess_anno_db <- function(ensIds){ guess_anno_db <- function(ensIds){
an_id <-ensIds[1] an_id <-ensIds[1]
...@@ -41,6 +56,8 @@ guess_anno_db <- function(ensIds){ ...@@ -41,6 +56,8 @@ guess_anno_db <- function(ensIds){
return("org.Dr.eg.db") return("org.Dr.eg.db")
}else if(str_detect(an_id, "FBgn")){ }else if(str_detect(an_id, "FBgn")){
return("org.Dm.eg.db") return("org.Dm.eg.db")
}else if(str_detect(an_id, "ENSPTRG")){
return("org.Pt.eg.db")
}else if(.is_yeast(ensIds)){ }else if(.is_yeast(ensIds)){
return("org.Sc.sgd.db") return("org.Sc.sgd.db")
}else{ }else{
...@@ -62,6 +79,8 @@ guess_pathview_species <- function(ensIds){ ...@@ -62,6 +79,8 @@ guess_pathview_species <- function(ensIds){
return("dre") return("dre")
}else if(str_detect(an_id, "ENSG")){ }else if(str_detect(an_id, "ENSG")){
return("hsa") return("hsa")
}else if(str_detect(an_id, "ENSPTRG")){
return("ptr")
}else if(str_detect(an_id, "FBgn")){ }else if(str_detect(an_id, "FBgn")){
return("dme") return("dme")
}else if(.is_yeast(ensIds)){ }else if(.is_yeast(ensIds)){
...@@ -73,17 +92,6 @@ guess_pathview_species <- function(ensIds){ ...@@ -73,17 +92,6 @@ guess_pathview_species <- function(ensIds){
#guess_mart("ENSCAFG00000000043") #guess_mart("ENSCAFG00000000043")
## see http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
#source("http://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
#biocLite("org.Hs.eg.db")
#biocLite("org.Dr.eg.db")
#biocLite("org.Dm.eg.db")
#biocLite("org.Sc.sgd.db")
#biocLite("KEGG.db")
#biocLite("ReactomePA")
#load_pack(ReactomePA) #load_pack(ReactomePA)
......
########################################################################################################################
### enrichment analysis
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
## e.g. getClusterReport --> plot2D
DEF_DAVID_ONTOLOGIES=ontologies=c("GOTERM_CC_FAT", "GOTERM_MF_FAT", "GOTERM_BP_FAT", "PANTHER_PATHWAY", "PANTHER_FAMILY", "PANTHER_PATHWAY", "KEGG_PATHWAY", "REACTOME_PATHWAY")
davidAnnotationChart <- function( someGenes, ontologies=DEF_DAVID_ONTOLOGIES ){
require_auto(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/)
## expexted to have a column with gene_id
# echo("processing list with", length(someGenes), "genes")
# someGenes <- degs$ensembl_gene_id[1:100]
if(length(someGenes)>1500){
someGenes <- sample(someGenes) %>% head(1500)
}
david<-DAVIDWebService$new(email="brandl@mpi-cbg.de")
# ## list all ontologies
# getAllAnnotationCategoryNames(david)
# getTimeOut(david)
setTimeOut(david, 80000) ## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
result<-addList(david, someGenes, idType="ENSEMBL_GENE_ID", listName=paste0("list_", sample(10000)[1]), listType="Gene")
david %>% setAnnotationCategories(ontologies)
annoChart <-getFunctionalAnnotationChart(david)
# clusterReport <-getClusterReport(david)
unloadNamespace('RDAVIDWebService')
## remove gene colum
# browser()
annoChart <- as.data.frame(unclass(annoChart))
# http://stackoverflow.com/questions/25271856/cannot-coerce-class-typeof-is-double-to-a-data-frame
# if(nrow(annoChart) >0) annoChart <- annoChart %>% dplyr::select(select=-Genes)
return(annoChart)
}
...@@ -57,6 +57,8 @@ guess_mart <- function(gene_id){ ...@@ -57,6 +57,8 @@ guess_mart <- function(gene_id){
return("mmusculus_gene_ensembl") return("mmusculus_gene_ensembl")
}else if(str_detect(an_id, "ENSDARG")){ }else if(str_detect(an_id, "ENSDARG")){
return("drerio_gene_ensembl") return("drerio_gene_ensembl")
}else if(str_detect(an_id, "ENSPTRG")){
return("ptroglodytes_gene_ensembl")
}else if(str_detect(an_id, "ENSG")){ }else if(str_detect(an_id, "ENSG")){
return("hsapiens_gene_ensembl") return("hsapiens_gene_ensembl")
}else if(str_detect(an_id, "FBgn")){ }else if(str_detect(an_id, "FBgn")){
...@@ -82,24 +84,6 @@ get_ensembl_build <- function(){ ...@@ -82,24 +84,6 @@ get_ensembl_build <- function(){
biomaRt::listMarts() %>% as.data.frame() %>% filter(biomart=="ensembl") %>% with(str_match(version, " ([0-9]*) ")) %>% subset(select=2) biomaRt::listMarts() %>% as.data.frame() %>% filter(biomart=="ensembl") %>% with(str_match(version, " ([0-9]*) ")) %>% subset(select=2)
} }
guess_pathview_species <- function(gene_id){
an_id <-gene_id[1]
## see http://www.genome.jp/kegg-bin/find_org_www?mode=abbr&obj=mode.map
if(str_detect(an_id, "ENSMUSG")){
return("mmu")
}else if(str_detect(an_id, "ENSDARG")){
return("dre")
}else if(str_detect(an_id, "ENSG")){
return("hsa")
}else if(str_detect(an_id, "FBgn")){
return("dme")
}else{
stop(paste("could not guess mart from ", an_id))
}
}
#guess_mart("ENSCAFG00000000043")
getGeneInfo <- function(gene_ids){ getGeneInfo <- function(gene_ids){
martName <- guess_mart(gene_ids[1]) martName <- guess_mart(gene_ids[1])
...@@ -202,8 +186,6 @@ diff_intersect <- function(deData, sample_1, sample_twoes, .intersect_method, .. ...@@ -202,8 +186,6 @@ diff_intersect <- function(deData, sample_1, sample_twoes, .intersect_method, ..
## todo add helper to test for equality (s1 and s2 not differentially expressed) ## todo add helper to test for equality (s1 and s2 not differentially expressed)
## from marta: ## from marta:
#s1_eq_s2 <- function(s1, s2, degData=degs) subset(degData, sample_1==s1 & sample_2==s2 & sample_1_overex==F)$gene_id #s1_eq_s2 <- function(s1, s2, degData=degs) subset(degData, sample_1==s1 & sample_2==s2 & sample_1_overex==F)$gene_id
...@@ -227,90 +209,3 @@ rintersect.list <- function(LDF){ ...@@ -227,90 +209,3 @@ rintersect.list <- function(LDF){
rec_intersect rec_intersect
} }
########################################################################################################################
### enrichment analysis
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
## e.g. getClusterReport --> plot2D
DEF_DAVID_ONTOLOGIES=ontologies=c("GOTERM_CC_FAT", "GOTERM_MF_FAT", "GOTERM_BP_FAT", "PANTHER_PATHWAY", "PANTHER_FAMILY", "PANTHER_PATHWAY", "KEGG_PATHWAY", "REACTOME_PATHWAY")
davidAnnotationChart <- function( someGenes, ontologies=DEF_DAVID_ONTOLOGIES ){
require_auto(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/)
## expexted to have a column with gene_id
# echo("processing list with", length(someGenes), "genes")
# someGenes <- degs$ensembl_gene_id[1:100]
if(length(someGenes)>1500){
someGenes <- sample(someGenes) %>% head(1500)
}
david<-DAVIDWebService$new(email="brandl@mpi-cbg.de")
# ## list all ontologies
# getAllAnnotationCategoryNames(david)
# getTimeOut(david)
setTimeOut(david, 80000) ## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
result<-addList(david, someGenes, idType="ENSEMBL_GENE_ID", listName=paste0("list_", sample(10000)[1]), listType="Gene")
david %>% setAnnotationCategories(ontologies)
annoChart <-getFunctionalAnnotationChart(david)
# clusterReport <-getClusterReport(david)
unloadNamespace('RDAVIDWebService')
## remove gene colum
# browser()
annoChart <- as.data.frame(unclass(annoChart))
# http://stackoverflow.com/questions/25271856/cannot-coerce-class-typeof-is-double-to-a-data-frame
# if(nrow(annoChart) >0) annoChart <- annoChart %>% dplyr::select(select=-Genes)
return(annoChart)
}
## todo move to diffex commons
guess_cp_species <- function(ensIds){
an_id <-ensIds[1]
if(str_detect(an_id, "ENSG")){
return("human")
}else if(str_detect(an_id, "ENSMUSG")){
return("mouse")
}else if(str_detect(an_id, "ENSDARG")){
return("zebrafish")
}else if(str_detect(an_id, "FBgn")){
return("fly")
}else{
stop(paste("could not clusterProfiler species name from ", an_id))
}
}
guess_anno_db <- function(ensIds){
an_id <-ensIds[1]
if(str_detect(an_id, "ENSG")){
return("org.Hs.eg.db")
}else if(str_detect(an_id, "ENSMUSG")){
return("org.Mm.eg.db")
}else if(str_detect(an_id, "ENSDARG")){
return("org.Dr.eg.db")
}else if(str_detect(an_id, "FBgn")){
return("org.Dm.eg.db")
}else{
stop(paste("could not anno db mart from ", an_id))
}
}
...@@ -35,12 +35,12 @@ library(DESeq2) ...@@ -35,12 +35,12 @@ library(DESeq2)
#load_pack(readr) #load_pack(readr)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.42/R/ggplot_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/ggplot_commons.R")
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/bio/diffex_commons.R") ## also load common helper for expression data analysis
# source("/lustre/projects/bioinfo/herseman/scripts/ngs_tools/dge_workflow/diffex_commons.R", chdir = T) # source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v5/dge_workflow/diffex_commons.R", chdir = T) devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/dge_workflow/diffex_commons.R")
## process input arguments ## process input arguments
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment