Commit 703ce35f authored by Holger Brandl's avatar Holger Brandl

added support for design; fixed q/p cutoff parameters

parent 6c28e105
......@@ -13,6 +13,7 @@ Usage: featcounts_deseq_mf.R [options] <count_matrix> <design_matrix>
Options:
--contrasts=<tab_delim_table> Table with sample pairs for which dge analysis should be performed
--design <formula> Design fomula for DeSeq with contrast attribute at the end [default: condition]
--qcutoff <qcutoff> Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]
--pcutoff <pcutoff> Override q-value filter and filter by p-value instead
--out <name_prefix> Name to prefix all generated result files [default: ]
......@@ -26,8 +27,8 @@ opts = docopt(doc, commandArgs(TRUE))
## load some packages first because of name-space order
library(knitr)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.48/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.48/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.49/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.49/R/ggplot_commons.R")
## also load common helper for expression data analysis
# source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
......@@ -41,6 +42,10 @@ contrasts_file = opts$contrasts
gene_info_file = opts$gene_info
assert(is.null(gene_info_file) || file.exists(gene_info_file), "invalid gene_info_file")
designFormula = opts$design
assert(str_detect(designFormula, ".*condition$"))
results_prefix = if (str_length(opts$out) > 0) paste0(opts$out, ".") else "" # used by add_prefix
pcutoff = if (is.null(opts$pcutoff))NULL else as.numeric(opts$pcutoff)
......@@ -62,7 +67,7 @@ vec_as_df(unlist(opts)) %>%
## both batches combines
contrasts = read_tsv(contrasts_file) %T>% glimpse
contrasts = read_tsv(contrasts_file) %T>% kable
expDesign = read_tsv(design_matrix_file) %T>% glimpse
# countData = read_tsv(interp_from_env("../logms_inten_matrix_acc.txt")) %T>% glimpse
countData = read_tsv(count_matrix_file) %T>% glimpse
......@@ -161,9 +166,17 @@ orderMatcheExpDesign = data_frame(replicate = colnames(expMatrix)) %>%
right_join(expDesign, by = "replicate") %>%
arrange(col_index)
table_browser(orderMatcheExpDesign)
#' The used design formula is.
designFormula
## build design matrix
#A key strength of limma’s linear modelling approach, is the ability accommodate arbitrary experimental complexity. Simple designs, such as the one in this workflow, with cell type and batch, through to more complicated factorial designs and models with interaction terms can be handled relatively easily
design <- orderMatcheExpDesign %$% model.matrix(~ 0 + batch + condition)
# design <- orderMatcheExpDesign %$% model.matrix(~ 0 + condition)
design <- orderMatcheExpDesign %$% model.matrix(formula(as.formula(paste("~0+", designFormula))))
rownames(design) <- orderMatcheExpDesign$replicate
#design <- model.matrix(~0+group+lane)
......@@ -192,7 +205,7 @@ str(voomNorm)
# levels = colnames(design))
#' contrasts matrix was
contr.matrix = makeContrasts(contrasts = with(contrasts, paste0("condition", condition_1, "-", "condition", condition_2)), levels = colnames(design))
contr.matrix = makeContrasts(contrasts = with(contrasts, paste0("condition", condition_1, "-", "condition", condition_2)), levels = colnames(design) )
contr.matrix
......@@ -280,10 +293,10 @@ deResults %>% ggplot(aes(adj_p_val)) + geom_histogram()
#+ results='asis'
if (! is.null(qcutoff)) {
echo("Using q-value cutoff of", qcutoff)
deResults %<>% transform(is_hit = adj_p_val < 0.05)
deResults %<>% transform(is_hit = adj_p_val < qcutoff)
}else {
echo("Using p-value cutoff of", pcutoff)
deResults %<>% transform(is_hit = adj_p_val < 0.05)
deResults %<>% transform(is_hit = p_value < pcutoff)
}
#+
......@@ -368,12 +381,15 @@ degs %>%
spread(c1_overex, n) %>%
kable()
ggplot(degs, aes(paste(condition_1, "vs", condition_2), fill = (c1_overex))) +
geom_bar(position = "dodge") +
xlab(NULL) +
ylab("# DGEs") +
ggtitle("DEGs by contrast") +
coord_flip()
degs %>% mutate(contrast=paste(condition_1, "vs", condition_2))%>%
count(contrast, c1_overex) %>%
complete(contrast, c1_overex, fill = list(n = 0)) %>%
ggplot(aes(contrast, n , fill=c1_overex)) +
geom_col(position = "dodge") +
xlab(NULL) + ylab("# DGEs") +
ggtitle("DEGs by contrast") +
coord_flip()
#with(degs, as.data.frame(table(condition_1, condition_2, c1_overex))) %>% filter(Freq >0) %>% kable()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment