Commit ffc16c40 authored by Holger Brandl's avatar Holger Brandl

cont mf deseq

parent 2d9c0213
......@@ -55,7 +55,7 @@ if(is.numeric(pcutoff)) opts$qcutoff <- NULL
#' Working Directory: `r getwd()`
countData <- read.delim(count_matrix_file)
countData <- read_tsv(count_matrix_file)
## save a backup into the current working directory as a reference
countData %>% write.delim(file=paste0(resultsBase, "input_counts.txt"))
......@@ -86,13 +86,22 @@ genesAfter <- nrow(countMatrix)
#' [Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2](http://genomebiology.com/2014/15/12/550/abstract)
get_sample_from_replicate <- function(repName) str_match(repName, "(.*)_([R]|rep)?[0-9]{1}$")[,2]
if(!is.null(design_matrix_file)){
replicates2samples <- read_tsv(design_matrix_file) #%>% select(replicate, sample)
}else{
get_sample_from_replicate <- function(repName) str_match(repName, "(.*)_([R]|rep)?[0-9]{1,2}$")[,2]
replicates2samples <- data_frame(replicate=colnames(countMatrix)) %>% mutate(sample=get_sample_from_replicate(replicate)) # %>% distinct()
}
# Define or load a contrasts matrix
if(!is.null(contrasts_file)){
contrasts <- read.delim(contrasts_file, header=T) %>% fac2char()
contrasts <- read_tsv(contrasts_file)
}else{
contrasts <- data.frame(sample=get_sample_from_replicate(colnames(countMatrix))) %>% distinct() %>%
stopifnot(is.null(design_matrix_file)) ## cant work yet because sample column with have random name
contrasts <- distinct(replicates2samples, sample) %>% select(sample) %>%
merge(.,., suffixes=c("_1", "_2"), by=NULL) %>%
filter(ac(sample_1)>ac(sample_2)) %>%
# filter(ac(sample_1)!=ac(sample_2)) %>%
......@@ -100,7 +109,11 @@ if(!is.null(contrasts_file)){
write.delim(contrasts, paste(resultsBase, "contrasts.txt"))
}
#' The used contrasts model is
#' The used design matrix is
contrasts %>% kable()
#' The contrasts of interest are
contrasts %>% kable()
## gene specific factors
......@@ -111,12 +124,21 @@ contrasts %>% kable()
#dds <- estimateSizeFactors( dds )
#sizeFactors(dds)
## try again but now use lambda normalization
## see "3.11 Sample-/gene-dependent normalization factors" in the DEseq2 manual for details
colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
## old non-mf approach
#colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
## new mf-approach
colData <- replicates2samples %>%data.frame(replciate=colnames(countMatrix) %>% get_sample_from_replicate) %>%
arrange(replicate)
colData %<>% rename(sample, sample_2ndwt)
#colData <- replicates2samples %>% arrange(colnames(countMatrix))
# valiadate that contrasts model is compatible with data
%<>%
if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
echo("column model: ",colData$condition)
echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment