Commit f0c589a4 authored by Holger Brandl's avatar Holger Brandl

expose used ensembl backend db as parameter; more strict input data validation.

parent 6b32d1fb
......@@ -16,6 +16,7 @@ Options:
--out <name_prefix> Name to prefix all generated result files [default: ]
--design <formula> Design fomula for DeSeq with contrast attribute at the end [default: condition]
--lfc <lfc_cutoff> Just report genes with abs(lfc) > lfc_cutoff as hits [default: 1.0]
--ensembl_db <ensembl_db> Ensebmbl db to be used. If not specified inferred from data. TODO implement NONE here!!
'
#commandArgs <- function(x) c("--design", "'batch+condition'", "--contrasts", "../contrasts.txt star_counts_matrix_reduced_sampleset.txt", "basic_design_reduced_sampleset.txt")
......@@ -166,6 +167,9 @@ exDesign = data_frame(replicate = colnames(countMatrix)) %>%
right_join(replicates2samples, by = "replicate") %>%
arrange(col_index) #%>% transmute(condition=sample_2ndwt) %>% fac2char
assert(!any(is.na(exDesign$col_index)), "assay design file is not compatible to count matrix column names")
## infer the sample to be used from the formula
#designFormula = "sample_2ndwt + batch"
......@@ -175,6 +179,8 @@ exDesign = data_frame(replicate = colnames(countMatrix)) %>%
#exDesign = replicates2samples %>% arrange(colnames(countMatrix))
# valiadate that contrasts model is compatible with data
assert(setequal(names(contrasts), c("condition_1", "condition_2")), "contrasts matrix has incorrect column headers. Expected: condition_1, condition_2")
designSamples = as_df(exDesign)[, contrastAttribute] %>%
unique %>%
sort
......@@ -265,7 +271,8 @@ session::save.session(".fc_debug.dat");
load_pack(limma)
load_pack(stringi)
load_pack(htmltools)
library(plotly)
load_pack(plotly)
# install.packages("htmlwidgets")
# extract rlogTransformation normalized count data
norm_counts <- assay(rld)
......@@ -625,6 +632,8 @@ deResults %>%
install_package("biomaRt")
ensembl_dataset= if (!is.null(opts$ensembl_db)) paste0(opts$ensembl_db, "_gene_ensembl") else guess_mart(countData$ensembl_gene_id)
# Load transcriptome annotations needed for results annotation
geneInfo = quote({
## mart = biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))??
......@@ -632,7 +641,7 @@ geneInfo = quote({
## todo fix this https://support.bioconductor.org/p/74322/
# mart = biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
# mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = "mmusculus_gene_ensembl", host = "dec2016.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = guess_mart(countData$ensembl_gene_id), host = "mar2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "mar2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment