Skip to content
Snippets Groups Projects
Commit a7cb6336 authored by Holger Brandl's avatar Holger Brandl
Browse files

smoothed deseq workflow

parent 9c606c16
No related branches found
No related tags found
No related merge requests found
......@@ -33,25 +33,6 @@ require.auto(DT)
require.auto(gplots)
##+ results='asis', echo=FALSE
#cat('
#<link rel="stylesheet" type="text/css" href="http://cdn.datatables.net/1.10.5/css/jquery.dataTables.min.css">
#<script type="text/javascript" charset="utf8" src="http://code.jquery.com/jquery-2.1.2.min.js"></script>
#<script type="text/javascript" charset="utf8" src="http://cdn.datatables.net/1.10.5/js/jquery.dataTables.min.js"></script>
#
#<script type="text/javascript">
# $(document).ready(function() {
# // alert("test")
# //$("table").DataTable();
# //$("table").DataTable();
# //$("#tab_id").DataTable();
# $(".dtable").DataTable();
# } );
#</script>
#')
count_matrix_file <- opts$count_matrix
contrasts_file <- opts$contrasts
......@@ -61,8 +42,6 @@ if(is.numeric(pcutoff)) opts$qcutoff <- NULL
########################################################################################################################
#' # Differential Expresssion Analysis
#' ## Data preparation
#' Working Directory: `r getwd()`
......@@ -74,41 +53,27 @@ countData <- read.delim(count_matrix_file)
head(countData) %>% kable()
countMatrix <- countData %>% column2rownames("ensembl_gene_id") %>% as.matrix()
#' Apply expression filter
nrow(countMatrix)
countMatrix %<>% filterByExpression()
nrow(countMatrix)
# Expression Filtering
genesBefore <- nrow(countMatrix)
countMatrix %<>% filterByExpression(opts$min_count)
genesAfter <- nrow(countMatrix)
#' Counts were filtered to only retain genes which had more that `opts$min_count` alignments in at least one replicates. This reduced the number of genes from `r genesBefore' to `r genesAfter`.
########################################################################################################################
#' # Load annotation data
## load transcriptome annotations needed for results annotation
geneInfo <- quote({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")
#filterByExpression <- function(fpkmMat, min_count=1, logMode=F){
# if(logMode) fpkmMat<-log10(fpkmMat+1) ## add a pseudocount
#
# geneMax <- apply(fpkmMat, 1, max)
#
# fpkmMat[geneMax>min_count,]
#}
#countMatrixFilt <- filterByExpression(countMatrix, min_count=50)
#' ## Perform Differential Expression Analysis
#' See deseq reference [docs](http://master.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf) for details
#' To understand fold-change shrinking and estimation check out
#' [Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2](http://genomebiology.com/2014/15/12/550/abstract)
## todo expose as argument
# TODO add general info about the deseq process/model here
get_sample_from_replicate <- function(repName) str_match(repName, "(.*)_[0-9]{1}$")[,2]
#' Define or load a contrasts matrix
# Define or load a contrasts matrix
if(!is.null(contrasts_file)){
contrasts <- read.delim(contrasts_file, header=T) %>% fac2char()
}else{
......@@ -120,12 +85,9 @@ if(!is.null(contrasts_file)){
write.delim(contrasts, "dba_contrasts.txt")
}
#' The used contrasts model is
contrasts %>% kable()
########################################################################################################################
#' ## Perform Differential Expression Analysis
## gene specific factors
#normalizationFactors(dds)
......@@ -145,9 +107,7 @@ dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition))
dds <- estimateSizeFactors(dds)
#' # Custom Lambda Normalization
#' See https://www.biostars.org/p/79978/
#' For details about size factor normalziation and calculation see https://www.biostars.org/p/79978/
##' Size Factors estimated by DESeq
sizeFactors(dds) %>% set_names(colnames(countMatrix)) %>% melt() %>% rownames2column("sample") %>% ggplot(aes(sample, value)) + geom_bar(stat="identity") + ggtitle("deseq size factors")
......@@ -167,7 +127,6 @@ dds <- DESeq(dds)
#resultsNames(dds)
#' Model Overview
#+ results='asis'
summary(results(dds))
########################################################################################################################
......@@ -339,6 +298,15 @@ deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
## load transcriptome annotations needed for results annotation
geneInfo <- quote({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")
# Export the complete dataset for later analysis
deAnnot <- deResults %>%
#inner_join(normCounts) %>%
......@@ -430,7 +398,8 @@ save(degs, file=".degs.RData")
########################################################################################################################
#' ### MA Plot
#' ## MA Plots
#' Redo MA plots but now including hit overlays
maPlots <- deResults %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
......@@ -441,7 +410,7 @@ maData %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_1/nor
ggtitle(with(maData[1,], paste(sample_1, "vs", sample_2)))
}) %$% gg
#+ fig.height=10*ceiling(length(maPlots)/2)
#+ fig.height=7*ceiling(length(maPlots)/2)
multiplot(plotlist=maPlots, cols=min(2, length(maPlots)))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment