Commit e3b0de32 authored by Lena Hersemann's avatar Lena Hersemann

added option to give own geneInfo file

parent c3c9f565
......@@ -17,10 +17,11 @@ Options:
--design <formula> Design fomula for DeSeq with contrast attribute at the end [default: condition]
--lfc <lfc_cutoff> Just report genes with abs(lfc) > lfc_cutoff as hits [default: 1.0]
--ensembl_db <ensembl_db> Ensebmbl db to be used. If not specified inferred from data. TODO implement NONE here!!
--geneInfo <info_file> Gene information file downloaded from the ensembl website if bioMart version is not present [default: bioMart version will be used]
'
#commandArgs <- function(x) c("--design", "'batch+condition'", "--contrasts", "../contrasts.txt star_counts_matrix_reduced_sampleset.txt", "basic_design_reduced_sampleset.txt")
#commandArgs <- function(x) c("--design", "'condition'", "--contrasts", "../contrasts_pairwise.txt", "../alignments/star_counts_matrix.txt", "../basic_design_pairwise.txt")
#commandArgs <- function(x) c("--design", "'condition'", "--contrasts", "../contrasts.txt", "--geneInfo", "../mart_export_CHOK1GS_HDv1_v90.txt", "../alignments/star_counts_matrix.txt", "../basic_design.txt")
opts = docopt(doc, commandArgs(TRUE))
......@@ -35,7 +36,8 @@ library(DESeq2)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.42/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/bio/diffex_commons.R")
# devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/bio/diffex_commons.R")
source("/lustre/projects/bioinfo/herseman/scripts/ngs_tools/dge_workflow/diffex_commons.R", chdir = T)
## process input arguments
......@@ -635,19 +637,26 @@ install_package("biomaRt")
ensembl_dataset= if (!is.null(opts$ensembl_db)) paste0(opts$ensembl_db, "_gene_ensembl") else guess_mart(countData$ensembl_gene_id)
# Load transcriptome annotations needed for results annotation
geneInfo = quote({
## mart = biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))??
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
# mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", host = "dec2016.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "mar2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart = mart) %>%
tbl_df
}) %>% cache_it("geneInfo")
if (is.null(opts$geneInfo)){
geneInfo = quote({
## mart = biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))??
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
# mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", host = "dec2016.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "aug2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart = mart) %>%
tbl_df
}) %>% cache_it("geneInfo")
} else {
geneInfo = read_tsv(opts$geneInfo)
colnames(geneInfo)[1] = "ensembl_gene_id"
}
geneLengths = transmute(geneInfo, ensembl_gene_id, gene_length = end_position - start_position)
geneDescs = transmute(geneInfo, ensembl_gene_id, gene_name = external_gene_name, gene_description = description)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment