Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
N
ngs_tools
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
15
Issues
15
List
Boards
Labels
Service Desk
Milestones
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Analytics
Analytics
Insights
Issue
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
bioinfo
ngs_tools
Commits
f4aa55d8
Commit
f4aa55d8
authored
Oct 27, 2017
by
Holger Brandl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added support for chimpanzee
parent
a2e5630c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
91 additions
and
132 deletions
+91
-132
common/cp_enrichment.R
common/cp_enrichment.R
+9
-5
common/cp_utils.R
common/cp_utils.R
+23
-15
dge_workflow/deprecated/david_enrichment_util.R
dge_workflow/deprecated/david_enrichment_util.R
+52
-0
dge_workflow/diffex_commons.R
dge_workflow/diffex_commons.R
+2
-107
dge_workflow/featcounts_deseq_mf.R
dge_workflow/featcounts_deseq_mf.R
+5
-5
No files found.
common/cp_enrichment.R
View file @
f4aa55d8
...
...
@@ -19,14 +19,18 @@ Options:
opts
<-
docopt
(
doc
,
commandArgs
(
TRUE
))
## does not work when spining
# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" )
coreCommons
=
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
4
/R/core_commons.R"
coreCommons
=
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
5
/R/core_commons.R"
devtools
::
source_url
(
coreCommons
)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/ggplot_commons.R"
)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.45/R/ggplot_commons.R"
)
devtools
::
source_url
(
"https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/common/cp_utils.R"
)
# source(interp_from_env("${NGS_TOOLS}/common/cp_utils.R"))
## also load diffex helpers for guess_mart
# source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
devtools
::
source_url
(
"https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/dge_workflow/diffex_commons.R"
)
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/cp_utils.R")
devtools
::
source_url
(
"https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v2/common/cp_utils.R"
)
# devtools::source_url("https://www.dropbox.com/s/p2b8luxf7jteb63/cp_utils.R?dl=1")
# should be no longer needed
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/bio/diffex_commons.R")
...
...
common/cp_utils.R
View file @
f4aa55d8
...
...
@@ -11,25 +11,40 @@ install_package("clusterProfiler")
guess_cp_species
<-
function
(
ensIds
){
an_id
<-
ensIds
[
1
]
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"human"
)
}
else
if
(
str_detect
(
an_id
,
"ENSMUSG"
)){
return
(
"mouse"
)
}
else
if
(
str_detect
(
an_id
,
"ENSDARG"
)){
return
(
"zebrafish"
)
}
else
if
(
str_detect
(
an_id
,
"ENSPTRG"
)){
return
(
"ptr"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"fly"
)
}
else
if
(
.is_yeast
(
ensIds
)){
return
(
"yeast"
)
}
else
{
stop
(
paste
(
"could not clusterProfiler species name from "
,
an_id
))
stop
(
paste
(
"could not
guess
clusterProfiler species name from "
,
an_id
))
}
}
## todo how to guess yeast ("org.Sc.sgd.db") from id?
# todo automatically install org-db if missing in guess_anno_db call
## see http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
# source("http://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
#biocLite("org.Hs.eg.db")
#biocLite("org.Dr.eg.db")
#biocLite("org.Dm.eg.db")
#biocLite("org.Sc.sgd.db")
#biocLite("org.Pt.eg.db")
#biocLite("KEGG.db")
#biocLite("ReactomePA")
guess_anno_db
<-
function
(
ensIds
){
an_id
<-
ensIds
[
1
]
...
...
@@ -41,6 +56,8 @@ guess_anno_db <- function(ensIds){
return
(
"org.Dr.eg.db"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"org.Dm.eg.db"
)
}
else
if
(
str_detect
(
an_id
,
"ENSPTRG"
)){
return
(
"org.Pt.eg.db"
)
}
else
if
(
.is_yeast
(
ensIds
)){
return
(
"org.Sc.sgd.db"
)
}
else
{
...
...
@@ -62,6 +79,8 @@ guess_pathview_species <- function(ensIds){
return
(
"dre"
)
}
else
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"hsa"
)
}
else
if
(
str_detect
(
an_id
,
"ENSPTRG"
)){
return
(
"ptr"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"dme"
)
}
else
if
(
.is_yeast
(
ensIds
)){
...
...
@@ -73,17 +92,6 @@ guess_pathview_species <- function(ensIds){
#guess_mart("ENSCAFG00000000043")
## see http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
#source("http://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
#biocLite("org.Hs.eg.db")
#biocLite("org.Dr.eg.db")
#biocLite("org.Dm.eg.db")
#biocLite("org.Sc.sgd.db")
#biocLite("KEGG.db")
#biocLite("ReactomePA")
#load_pack(ReactomePA)
...
...
dge_workflow/deprecated/david_enrichment_util.R
0 → 100644
View file @
f4aa55d8
########################################################################################################################
### enrichment analysis
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
## e.g. getClusterReport --> plot2D
DEF_DAVID_ONTOLOGIES
=
ontologies
=
c
(
"GOTERM_CC_FAT"
,
"GOTERM_MF_FAT"
,
"GOTERM_BP_FAT"
,
"PANTHER_PATHWAY"
,
"PANTHER_FAMILY"
,
"PANTHER_PATHWAY"
,
"KEGG_PATHWAY"
,
"REACTOME_PATHWAY"
)
davidAnnotationChart
<-
function
(
someGenes
,
ontologies
=
DEF_DAVID_ONTOLOGIES
){
require_auto
(
RDAVIDWebService
)
## just works if installed on non-network-drive (e.g. /tmp/)
## expexted to have a column with gene_id
# echo("processing list with", length(someGenes), "genes")
# someGenes <- degs$ensembl_gene_id[1:100]
if
(
length
(
someGenes
)
>
1500
){
someGenes
<-
sample
(
someGenes
)
%>%
head
(
1500
)
}
david
<-
DAVIDWebService
$
new
(
email
=
"brandl@mpi-cbg.de"
)
# ## list all ontologies
# getAllAnnotationCategoryNames(david)
# getTimeOut(david)
setTimeOut
(
david
,
80000
)
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
result
<-
addList
(
david
,
someGenes
,
idType
=
"ENSEMBL_GENE_ID"
,
listName
=
paste0
(
"list_"
,
sample
(
10000
)[
1
]),
listType
=
"Gene"
)
david
%>%
setAnnotationCategories
(
ontologies
)
annoChart
<-
getFunctionalAnnotationChart
(
david
)
# clusterReport <-getClusterReport(david)
unloadNamespace
(
'RDAVIDWebService'
)
## remove gene colum
# browser()
annoChart
<-
as.data.frame
(
unclass
(
annoChart
))
# http://stackoverflow.com/questions/25271856/cannot-coerce-class-typeof-is-double-to-a-data-frame
# if(nrow(annoChart) >0) annoChart <- annoChart %>% dplyr::select(select=-Genes)
return
(
annoChart
)
}
dge_workflow/diffex_commons.R
View file @
f4aa55d8
...
...
@@ -57,6 +57,8 @@ guess_mart <- function(gene_id){
return
(
"mmusculus_gene_ensembl"
)
}
else
if
(
str_detect
(
an_id
,
"ENSDARG"
)){
return
(
"drerio_gene_ensembl"
)
}
else
if
(
str_detect
(
an_id
,
"ENSPTRG"
)){
return
(
"ptroglodytes_gene_ensembl"
)
}
else
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"hsapiens_gene_ensembl"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
...
...
@@ -82,24 +84,6 @@ get_ensembl_build <- function(){
biomaRt
::
listMarts
()
%>%
as.data.frame
()
%>%
filter
(
biomart
==
"ensembl"
)
%>%
with
(
str_match
(
version
,
" ([0-9]*) "
))
%>%
subset
(
select
=
2
)
}
guess_pathview_species
<-
function
(
gene_id
){
an_id
<-
gene_id
[
1
]
## see http://www.genome.jp/kegg-bin/find_org_www?mode=abbr&obj=mode.map
if
(
str_detect
(
an_id
,
"ENSMUSG"
)){
return
(
"mmu"
)
}
else
if
(
str_detect
(
an_id
,
"ENSDARG"
)){
return
(
"dre"
)
}
else
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"hsa"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"dme"
)
}
else
{
stop
(
paste
(
"could not guess mart from "
,
an_id
))
}
}
#guess_mart("ENSCAFG00000000043")
getGeneInfo
<-
function
(
gene_ids
){
martName
<-
guess_mart
(
gene_ids
[
1
])
...
...
@@ -202,8 +186,6 @@ diff_intersect <- function(deData, sample_1, sample_twoes, .intersect_method, ..
## todo add helper to test for equality (s1 and s2 not differentially expressed)
## from marta:
#s1_eq_s2 <- function(s1, s2, degData=degs) subset(degData, sample_1==s1 & sample_2==s2 & sample_1_overex==F)$gene_id
...
...
@@ -227,90 +209,3 @@ rintersect.list <- function(LDF){
rec_intersect
}
########################################################################################################################
### enrichment analysis
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
## e.g. getClusterReport --> plot2D
DEF_DAVID_ONTOLOGIES
=
ontologies
=
c
(
"GOTERM_CC_FAT"
,
"GOTERM_MF_FAT"
,
"GOTERM_BP_FAT"
,
"PANTHER_PATHWAY"
,
"PANTHER_FAMILY"
,
"PANTHER_PATHWAY"
,
"KEGG_PATHWAY"
,
"REACTOME_PATHWAY"
)
davidAnnotationChart
<-
function
(
someGenes
,
ontologies
=
DEF_DAVID_ONTOLOGIES
){
require_auto
(
RDAVIDWebService
)
## just works if installed on non-network-drive (e.g. /tmp/)
## expexted to have a column with gene_id
# echo("processing list with", length(someGenes), "genes")
# someGenes <- degs$ensembl_gene_id[1:100]
if
(
length
(
someGenes
)
>
1500
){
someGenes
<-
sample
(
someGenes
)
%>%
head
(
1500
)
}
david
<-
DAVIDWebService
$
new
(
email
=
"brandl@mpi-cbg.de"
)
# ## list all ontologies
# getAllAnnotationCategoryNames(david)
# getTimeOut(david)
setTimeOut
(
david
,
80000
)
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
result
<-
addList
(
david
,
someGenes
,
idType
=
"ENSEMBL_GENE_ID"
,
listName
=
paste0
(
"list_"
,
sample
(
10000
)[
1
]),
listType
=
"Gene"
)
david
%>%
setAnnotationCategories
(
ontologies
)
annoChart
<-
getFunctionalAnnotationChart
(
david
)
# clusterReport <-getClusterReport(david)
unloadNamespace
(
'RDAVIDWebService'
)
## remove gene colum
# browser()
annoChart
<-
as.data.frame
(
unclass
(
annoChart
))
# http://stackoverflow.com/questions/25271856/cannot-coerce-class-typeof-is-double-to-a-data-frame
# if(nrow(annoChart) >0) annoChart <- annoChart %>% dplyr::select(select=-Genes)
return
(
annoChart
)
}
## todo move to diffex commons
guess_cp_species
<-
function
(
ensIds
){
an_id
<-
ensIds
[
1
]
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"human"
)
}
else
if
(
str_detect
(
an_id
,
"ENSMUSG"
)){
return
(
"mouse"
)
}
else
if
(
str_detect
(
an_id
,
"ENSDARG"
)){
return
(
"zebrafish"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"fly"
)
}
else
{
stop
(
paste
(
"could not clusterProfiler species name from "
,
an_id
))
}
}
guess_anno_db
<-
function
(
ensIds
){
an_id
<-
ensIds
[
1
]
if
(
str_detect
(
an_id
,
"ENSG"
)){
return
(
"org.Hs.eg.db"
)
}
else
if
(
str_detect
(
an_id
,
"ENSMUSG"
)){
return
(
"org.Mm.eg.db"
)
}
else
if
(
str_detect
(
an_id
,
"ENSDARG"
)){
return
(
"org.Dr.eg.db"
)
}
else
if
(
str_detect
(
an_id
,
"FBgn"
)){
return
(
"org.Dm.eg.db"
)
}
else
{
stop
(
paste
(
"could not anno db mart from "
,
an_id
))
}
}
dge_workflow/featcounts_deseq_mf.R
View file @
f4aa55d8
...
...
@@ -35,12 +35,12 @@ library(DESeq2)
#load_pack(readr)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
0
/R/core_commons.R"
)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
2
/R/ggplot_commons.R"
)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
5
/R/core_commons.R"
)
devtools
::
source_url
(
"https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4
5
/R/ggplot_commons.R"
)
#
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/bio/diffex_commons.R")
# source(
"/lustre/projects/bioinfo/herseman/scripts/ngs_tools/dge_workflow/diffex_commons.R", chdir = T
)
devtools
::
source_url
(
"https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v
5/dge_workflow/diffex_commons.R"
,
chdir
=
T
)
#
# also load common helper for expression data analysis
# source(
interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R")
)
devtools
::
source_url
(
"https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v
6/dge_workflow/diffex_commons.R"
)
## process input arguments
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment