Commit 32cc19ec authored by Lena Hersemann's avatar Lena Hersemann

Merge remote-tracking branch 'origin/master'

parents 6f4c9180 48e5f59c
......@@ -16,7 +16,7 @@ export PYTHONPATH=/home/brandl/bin/macs2/lib/python2.7/site-packages:$PYTHONPATH
cs_bowtie_qc(){
rendr_snippet "bowtie2_qc" <<EOF
rendr_snippet "bowtie2_qc" <<"EOF"
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.8/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.8/R/ggplot_commons.R")
......
This diff is collapsed.
......@@ -45,8 +45,7 @@ gene_info_file = opts$gene_info
assert(is.null(gene_info_file) || file.exists(gene_info_file), "invalid gene_info_file")
designFormula = opts$design
assert(str_detect(designFormula, ".*condition$"))
assert(str_detect(designFormula, "^condition.*")) ## make sure that the condition comes before all batch factors
results_prefix = if (str_length(opts$out) > 0) opts$out else "" # used by add_prefix
......@@ -136,7 +135,21 @@ group_labels = data_frame(replicate = colnames(expMatrix)) %>%
names(group_labels) = colnames(expMatrix)
makePcaPlot(t(expMatrix), color_by = group_labels, title = "PCA of quantifiable proteins in all conditions")
#' Also do a scatter plot matrix of the PCs
mydata.pca = prcomp(t(expMatrix), retx = TRUE, center = TRUE, scale. = FALSE)
# screeplot(mydata.pca)
# devtools::install_github("vqv/ggbiplot")
# require(ggbiplot)
ggbiplot::ggscreeplot(mydata.pca) + geom_col()
# load_pack(GGally)
pcs = mydata.pca$x %>% as_df %>% rownames_to_column("sample")
pcs %>% GGally::ggpairs(columns=2:6, mapping=ggplot2::aes(color=sample), upper="blank", legend=c(3,3)) + theme(legend.position = "bottom")
#' Also analyze spearman correlation
correlation = cor(expMatrix, method = "spearman")
library(lattice)
levelplot(correlation, scales = list(x = list(rot = 90)), pretty = TRUE, main = "Spearman correlation between conditions after Normalization", xlab = "Conditions", ylab = "Conditions")
......@@ -180,11 +193,15 @@ orderMatcheExpDesign = data_frame(replicate = colnames(expMatrix)) %>%
right_join(expDesign, by = "replicate") %>%
arrange(col_index)
## build design matrix
#A key strength of limma’s linear modelling approach, is the ability accommodate arbitrary experimental complexity. Simple designs, such as the one in this workflow, with cell type and batch, through to more complicated factorial designs and models with interaction terms can be handled relatively easily
#' Build design matrix
#' > A key strength of limma’s linear modelling approach, is the ability accommodate arbitrary experimental complexity. Simple designs, such as the one in this workflow, with cell type and batch, through to more complicated factorial designs and models with interaction terms can be handled relatively easily
#'
#' Make sure that non of the batch-factors is confounded with treatment (condition). See https://support.bioconductor.org/p/39385/ for a discussion
#' References
#' * https://f1000research.com/articles/5-1408/v1
# design <- orderMatcheExpDesign %$% model.matrix(~ 0 + condition)
design <- orderMatcheExpDesign %$% model.matrix(formula(as.formula(paste("~0+", designFormula))))
# design <- orderMatcheExpDesign %$% model.matrix(~ 0 + condition + prep_day)
design = orderMatcheExpDesign %$% model.matrix(formula(as.formula(paste("~0+", designFormula))))
rownames(design) <- orderMatcheExpDesign$replicate
#design <- model.matrix(~0+group+lane)
......@@ -319,7 +336,8 @@ deResults %<>% left_join(sampleMeans)
#' apply the hit criterion
deResults %>% ggplot(aes(adj_p_val)) + geom_histogram()
deResults %>% ggplot(aes(p_value)) + geom_histogram(binwidth=.01)
deResults %>% ggplot(aes(adj_p_val)) + geom_histogram(binwidth=.01)
# report hit criterion
#+ results='asis'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment