Skip to content
Snippets Groups Projects
Commit 49599782 authored by Holger Brandl's avatar Holger Brandl
Browse files

Merge branch 'master' of git-srv1:/local/git/bioinformatics

parents ae76b665 65cfe1d9
No related branches found
No related tags found
No related merge requests found
...@@ -21,7 +21,7 @@ Options: ...@@ -21,7 +21,7 @@ Options:
#opts <- docopt(doc, commandArgs(TRUE)) #opts <- docopt(doc, commandArgs(TRUE))
opts <- docopt(doc, "--pcutoff 0.05 --contrasts ../time_contrasts.txt ../peak_clusters_tss5kb.count_matrix.txt") opts <- docopt(doc, "--pcutoff 0.05 --contrasts ~/MPI-CBG_work/P5_DESeq/testing/dba_contrasts.txt ~/MPI-CBG_work/P5_DESeq/testing/countMatrix.txt")
## opts <- docopt(doc, "countMatrix.txt") ## opts <- docopt(doc, "countMatrix.txt")
require(knitr) require(knitr)
...@@ -60,15 +60,7 @@ qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff) ...@@ -60,15 +60,7 @@ qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff)
if(is.numeric(pcutoff)) opts$qcutoff <- NULL if(is.numeric(pcutoff)) opts$qcutoff <- NULL
########################################################################################################################
#' # Load annotation data
## load transcriptome annotations needed for results annotation
geneInfo <- quote({
mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")
######################################################################################################################## ########################################################################################################################
#' # Differential Expresssion Analysis #' # Differential Expresssion Analysis
...@@ -82,6 +74,8 @@ geneInfo <- quote({ ...@@ -82,6 +74,8 @@ geneInfo <- quote({
######################################################################################################################## ########################################################################################################################
#' # Differential Expresssion Analysis #' # Differential Expresssion Analysis
#' ## Data preparation
#' Working Directory: `r getwd()` #' Working Directory: `r getwd()`
resultsBase <- count_matrix_file %>% basename() %>% trim_ext(".txt") %>% trim_ext(".count_matrix") resultsBase <- count_matrix_file %>% basename() %>% trim_ext(".txt") %>% trim_ext(".count_matrix")
...@@ -96,6 +90,16 @@ nrow(countMatrix) ...@@ -96,6 +90,16 @@ nrow(countMatrix)
countMatrix %<>% filterByExpression() countMatrix %<>% filterByExpression()
nrow(countMatrix) nrow(countMatrix)
########################################################################################################################
#' # Load annotation data
## load transcriptome annotations needed for results annotation
geneInfo <- quote({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart <- biomaRt::useDataset(guess_mart(ensembl_gene_id), mart = biomaRt::useMart("ensembl")),
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")
#filterByExpression <- function(fpkmMat, min_count=1, logMode=F){ #filterByExpression <- function(fpkmMat, min_count=1, logMode=F){
# if(logMode) fpkmMat<-log10(fpkmMat+1) ## add a pseudocount # if(logMode) fpkmMat<-log10(fpkmMat+1) ## add a pseudocount
...@@ -250,9 +254,9 @@ maData %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_2/nor ...@@ -250,9 +254,9 @@ maData %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_2/nor
##Volcano plots ##Volcano plots
hitCounts <- filter(deResults, is_hit) %>% hitCounts <- filter(deResults, is_hit) %>%
count(sample_1, sample_2, sample_1_overex=s1_over_s2_logfc>0) %>% count(sample_1, sample_2, s1_overex) %>%
rename(hits=n) %>% rename(hits=n) %>%
merge(data.frame(sample_1_overex=c(T,F), x_pos=c(-2.5,2.5))) merge(data.frame(s1_overex=c(T,F), x_pos=c(-2.5,2.5)))
#+ fig.width=16, fig.height=14 #+ fig.width=16, fig.height=14
deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) + deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
...@@ -329,6 +333,66 @@ degs %>% ...@@ -329,6 +333,66 @@ degs %>%
select(s1_over_s2_logfc, pvalue, ensembl_gene_id, sample_1, sample_2, external_gene_name, description, igv) %>% select(s1_over_s2_logfc, pvalue, ensembl_gene_id, sample_1, sample_2, external_gene_name, description, igv) %>%
kable("html", table.attr = "class='dtable'", escape=F) kable("html", table.attr = "class='dtable'", escape=F)
#' DEGs (differentially expressed genes) are contained in the following table
write.delim(degs, file="degs.txt")
# degs <- read.delim("degs.txt")
#' [Differentially Expressed Genes](degs.txt)
#' ### DEG Counts
#with(degs, as.data.frame(table(sample_1, sample_2, s1_overex))) %>% filter(Freq >0) %>% kable()
degs %>% count( sample_1, sample_2, s1_overex) %>% kable()
#+ fig.height=nrow(contrasts)+2
#ggplot(degs, aes(paste(sample_1, "vs", sample_2), fill=status)) + geom_bar() +xlab(NULL) + ylab("# DGEs") +ggtitle("DEGs by contrast") + coord_flip()
ggplot(degs, aes(paste(sample_1, "vs", sample_2), fill=(s1_overex))) + geom_bar() + xlab(NULL) + ylab("# DGEs") + ggtitle("DEGs by contrast") + coord_flip()
#' DEGs can be browsed in Excel using the exported table or via the embedded table browser. To enable the IGV links, you need to set the port in the IGV preferences to 3334.
kableDegs <- degs
if(nrow(degs>2000)){
kableDegs <- degs %>% arrange(padj) %>% head(1000)
# Error: object 'q_value' not found. So: padj used instead of '-q_value'
print("just showing first 1000 most significant hits (highest p-value) in interactive hit table!!!! Use Excel to browser to browse the full set")
}
#+ results='asis'
kableDegs %>%
#inner_join(geneLoci) %>%
mutate(
ensembl=paste0("<a href='http://www.ensembl.org/Multi/Search/Results?y=0;site=ensembl_all;x=0;page=1;facet_feature_type=Gene;q=",ensembl_gene_id, "'>",ensembl_gene_id, "</a>"),
igv=paste0("<a href='http://localhost:60151/goto?locus=", chromosome_name,":", start_position, "-", end_position, "'>IGV</a>")
) %>%
select(sample_1, sample_2, s1_over_s2_logfc, pvalue, padj, ensembl_gene_id, external_gene_name, description) %>%
kable("html", table.attr = "class='dtable'", escape=F)
## just needed to restore environment easily
save(degs, file=".degs.RData")
# degs <- local(get(load(".degs.RData")))
##--------------------------------------------------------
#' ### MA Plot
#' Redo MA plots but now including hit overlays
maPlots <- deResults %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
## todo why not s2_over_s1_log2fc
maData %>% ggplot(aes(0.5*log2(value_1*value_2), log2(value_1/value_2), color=is_hit)) +
geom_point(alpha=0.3) +
geom_hline(yintercept=0, color="red") +
ggtitle(with(maData[1,], paste(sample_1, "vs", sample_2)))
}) %$% gg
#+ fig.height=10*ceiling(length(maPlots)/2)
multiplot(plotlist=maPlots, cols=min(2, length(maPlots)))
# Error in eval(expr, envir, enclos) : object 'value_1' not found
##--------------------------------------------------------
## TODO Melanie compare with section "Differentially expressed genes" and complement missing bits ## TODO Melanie compare with section "Differentially expressed genes" and complement missing bits
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment