Commit 79ca1300 authored by Lena Hersemann's avatar Lena Hersemann

changed the following: if no gene_info file is provided and the first colname...

changed the following: if no gene_info file is provided and the first colname of the count matrix is != 'ensembl_gene_id', the corresponding first column will be reported as geneInfo and geneDescs
parent 7c35fbc4
......@@ -24,6 +24,7 @@ Options:
'
# commandArgs = function(foo) c("--gene_info", "../mmus_ens_aug2017_uniprot_compl_gene_info.txt","--contrasts", "example_contrasts.txt", "../inten_matrix_acc.txt", "../diff_abund/ex_design.txt")
# commandArgs = function(x) c("--contrasts", "contrasts.txt", "../../data_prep.intens_imputed.txt", "exp_design_timepoint.txt")
opts = docopt(doc, commandArgs(TRUE))
## load some packages first because of name-space order
......@@ -360,25 +361,30 @@ deResults %>%
#' ## Annotate results
if (is.null(gene_info_file)) {
ensembl_dataset = if (! is.null(opts$ensembl_db)) paste0(opts$ensembl_db, "_gene_ensembl") else guess_mart(countData$ensembl_gene_id)
geneInfo = quote({
## mart = biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))??
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
# mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", host = "dec2016.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "aug2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart = mart) %>%
tbl_df %>%
rename(gene_id = ensembl_gene_id)
}) %>% cache_it("geneInfo")
# geneLengths = transmute(geneInfo, gene_id, gene_length = end_position - start_position)
geneDescs = transmute(geneInfo, gene_id, gene_name = external_gene_name, gene_description = description)
if ("ensembl_gene_id" %in% colnames(countData)){
ensembl_dataset = if (! is.null(opts$ensembl_db)) paste0(opts$ensembl_db, "_gene_ensembl") else guess_mart(countData$ensembl_gene_id)
geneInfo = quote({
## mart = biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))??
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
# mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", host = "dec2016.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "aug2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart = mart) %>%
tbl_df %>%
rename(gene_id = ensembl_gene_id)
}) %>% cache_it("geneInfo")
# geneLengths = transmute(geneInfo, gene_id, gene_length = end_position - start_position)
geneDescs = transmute(geneInfo, gene_id, gene_name = external_gene_name, gene_description = description)
} else {
geneInfo = distinct(countData, gene_id)
geneDescs = geneInfo
}
} else {
if(gene_info_file != "NA"){
geneInfo = read_tsv(gene_info_file)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment