Skip to content
Snippets Groups Projects
Commit 8da587a3 authored by Holger Brandl's avatar Holger Brandl
Browse files

better volcano plots

parent ae7d391d
No related branches found
No related tags found
No related merge requests found
......@@ -20,7 +20,7 @@ Options:
#print(commandArgs(TRUE))
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
## florio_11b: setwd("/lustre/projects/bioinfo/holger/projects/florio_11b/cuffdiff/dge_report_new"); opts <- docopt(doc, "--constrasts contrasts.txt --pcutoff 0.01 /projects/bioinfo/holger/projects/florio_11b/cuffdiff")
## florio_11b: setwd("/lustre/projects/bioinfo/holger/projects/florio_11b/cuffdiff/dge_report"); opts <- docopt(doc, "--constrasts contrasts.txt --pcutoff 0.01 /projects/bioinfo/holger/projects/florio_11b/cuffdiff")
# opts <- docopt(doc, "--undirected ..")
# opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), ".."))
## todo use commandArgs here!!
......@@ -49,7 +49,7 @@ if(is.na(file.info(cuffdb_directory)$isdir)){
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.7/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.7/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.7/R/bio/diffex_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/bio/diffex_commons.R")
#+ results='asis', echo=FALSE
cat('
......@@ -68,7 +68,11 @@ cat('
require(cummeRbund)
require(knitr)
#' Used DGE Analysis Paramters
## reload dplyr to fix namespace issues
unloadNamespace('dplyr'); require(dplyr)
#iris %>% count(Species)
#' Used DGE Analysis Parameters
vec2df(unlist(opts)) %>% filter(!str_detect(name, "^[<-]")) %>% kable()
......@@ -94,7 +98,7 @@ if(dbTempCopyRequired){
#' The expression tracking reference contained:
cuff
#' Used CuffDiff
#' Used CuffDiff:
runInfo(cuff) %>% filter(param!="cmd_line") %>% kable()
#' cuffdiff cmd was
#+ results='asis'
......@@ -213,7 +217,7 @@ csVolcanoMatrix(genes(cuff))
#' We also plot more detailed scatter plots for just the contrasts for interest. Signifcance coloring is based on cuffdiff's internal score an not our hit criterion.
#csVolcano(genes(cuff),"hESC","Fibroblasts")
csVolcPlots <- alply(contrasts, 1, splat(function(sample_1, sample_2) {
csVolcano(genes(cuff),sample_1,sample_2)
csVolcano(genes(cuff), sample_1, sample_2)
}))
#+ fig.width=16
......@@ -291,7 +295,7 @@ allDiff <- diffData(genes(cuff)) %>%
## discard all genes that are not expressed in any of the cell types (>1)
filter(gene_id %in% getExpressedGenes(cuff, minFPKM=minFPKM)) %>%
dplyr::rename(ensembl_gene_id=gene_id, s2_over_s1_log2fc=log2_fold_change) %>%
rename(ensembl_gene_id=gene_id, s2_over_s1_log2fc=log2_fold_change) %>%
mutate(sample_1_overex=s2_over_s1_log2fc<0)
#+ results='asis'
......@@ -324,7 +328,7 @@ write.delim(degs, file="degs.txt")
#' ### DEG Counts
#with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0) %>% kable()
degs %>% dplyr::count( sample_1, sample_2, sample_1_overex) %>% kable()
degs %>% count( sample_1, sample_2, sample_1_overex) %>% kable()
#+ fig.height=nrow(contrasts)+2
......@@ -337,7 +341,7 @@ ggplot(degs, aes(paste(sample_1, "vs", sample_2), fill=sample_1_overex)) + geom
degs %>%
inner_join(geneLoci) %>%
mutate(
ensembl=paste0("<a href='http://www.ensembl.org/Multi/Results?page=1;facet_feature_type=Gene;q=",ensembl_gene_id, "'>",ensembl_gene_id, "</a>"),
ensembl=paste0("<a href='http://www.ensembl.org/Multi/Search/Results?y=0;site=ensembl_all;x=0;page=1;facet_feature_type=Gene;q=",ensembl_gene_id, "'>",ensembl_gene_id, "</a>"),
igv=paste0("<a href='http://localhost:3334/goto?locus=", chromosome_name,":", start_position, "-", end_position, "'>IGV</a>")
) %>%
select(sample_1, sample_2, value_1, value_2, s2_over_s1_log2fc, p_value, q_value, ensembl, external_gene_name, description, gene_biotype, igv) %>%
......@@ -347,12 +351,8 @@ degs %>%
save(degs, file=".degs.RData")
# degs <- local(get(load(".degs.RData")))
#' Also redo volcano plots with out hit overlays
#+ fig.width=16, fig.height=14
csVolcPlots <- alply(contrasts, 1, splat(function(sample_1, sample_2) {
csVolcano(genes(cuff),sample_1,sample_2)
}))
#' ### MA Plot
#' Redo MA plots but now including hit overlays
maPlots <- allDiff %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
## todo why not s2_over_s1_log2fc
......@@ -362,36 +362,69 @@ maPlots <- allDiff %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
ggtitle(with(maData[1,], paste(sample_1, "vs", sample_2)))
}) %$% gg
#+ fig.width=16
#+ fig.height=10*ceiling(length(maPlots)/2)
multiplot(plotlist=maPlots, cols=min(2, length(maPlots)))
##Volcano plots
#' ### Relation of FC and p-values
#' To assess the relation of fold-changes and p-value we plot the data as volcano plots for each contrasts including the number of DEGS according our hit criterion.
hitCounts <- degs %>%
dplyr::count(sample_1, sample_2, sample_1_overex) %>%
dplyr::rename(hits=n) %>%
count(sample_1, sample_2, sample_1_overex) %>%
rename(hits=n) %>%
merge(data.frame(sample_1_overex=c(T,F), x_pos=c(-3,3)))
#' For the volcano plot matrix we calculate fold-changes as x/y (ie column/row). For details see [here](http://rpackages.ianhowson.com/bioc/cummeRbund/man/csVolcano.html)
##' For the volcano plot matrix we calculate fold-changes as x/y (ie column/row). For details see [here](http://rpackages.ianhowson.com/bioc/cummeRbund/man/csVolcano.html)
#+ fig.width=16, fig.height=14
allDiff %>% ggplot(aes(s2_over_s1_log2fc, -log10(p_value), color=is_hit)) +
# geom_jitter(alpha=0.3, position = position_jitter(height = 0.2)) +
geom_point(alpha=0.3) +
# theme_bw() +
xlim(-3.5,3.5) +
scale_color_manual(values = c("TRUE"="red", "FALSE"="black")) +
# ggtitle("Insm1/Ctrl") +
## http://stackoverflow.com/questions/19764968/remove-point-transparency-in-ggplot2-legend
guides(colour = guide_legend(override.aes = list(alpha=1))) +
#allDiff %>% ggplot(aes(s2_over_s1_log2fc, -log10(p_value), color=is_hit)) +
## geom_jitter(alpha=0.3, position = position_jitter(height = 0.2)) +
# geom_point(alpha=0.3) +
## theme_bw() +
# xlim(-3.5,3.5) +
# scale_color_manual(values = c("TRUE"="red", "FALSE"="black")) +
## ggtitle("Insm1/Ctrl") +
# ## http://stackoverflow.com/questions/19764968/remove-point-transparency-in-ggplot2-legend
# guides(colour = guide_legend(override.aes = list(alpha=1))) +
#
# ## tweak axis labels
# xlab(expression(log[2]("x/y"))) +
# ylab(expression(-log[10]("p value"))) +
#
# ## add hit couts
# geom_text(aes(label=hits, x=x_pos), y=2, color="red", size=10, data=hitCounts) +
# facet_grid(sample_1 ~ sample_2)
volcPlots <- allDiff %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
maData %>% ggplot(aes(s2_over_s1_log2fc, -log10(p_value), color=is_hit)) +
# geom_jitter(alpha=0.3, position = position_jitter(height = 0.2)) +
geom_point(alpha=0.3) +
# theme_bw() +
xlim(-3.5,3.5) +
scale_color_manual(values = c("TRUE"="red", "FALSE"="black")) +
# ggtitle("Insm1/Ctrl") +
## http://stackoverflow.com/questions/19764968/remove-point-transparency-in-ggplot2-legend
# guides(colour = guide_legend(override.aes = list(alpha=1))) +
guides(colour = F) +
## tweak axis labels
# xlab(expression(log[2]("x/y"))) +
with(maData[1,], xlab(paste0("log2(", sample_2, "/", sample_1, ")"))) +
ylab(expression(-log[10]("p value"))) +
# with(maData[1,], ggtitle(paste0(sample_2, " over ", sample_1))) +
## add hit couts
geom_text(aes(label=hits, x=x_pos), y=2.5, color="red", size=10, data=semi_join(hitCounts, maData))
}) %$% gg
#+ fig.height=10*ceiling(length(maPlots)/2)
volcPlots %>% multiplot(plotlist=., cols=min(2, length(.)))
## tweak axis labels
xlab(expression(log[2]("x/y"))) +
ylab(expression(-log[10]("p value"))) +
## add hit couts
geom_text(aes(label=hits, x=x_pos), y=2, color="red", size=10, data=hitCounts) +
facet_grid(sample_1 ~ sample_2)
## redefine because changed after v1.7
plotPDF <- function(fileBaseName, expr, ...){ pdf(paste0(fileBaseName, ".pdf"), ...); expr; dev.off(); }
plotPDF("contrasts_volcano", volcPlots %>% multiplot(plotlist=., cols=min(2, length(.))), width=12, height=ceiling(nrow(contrasts)/2)*8 )
#' [volcano PDF](contrasts_volcano.pdf)
########################################################################################################################
......@@ -470,7 +503,7 @@ term_barplot_files <- sigEnrResults %>% do({
# print(logPlot)
tmpPng <- paste0("enrterms__", fileNameLabel, ".png")
ggsave(tmpPng, logPlot, width=10, height = round(2+nrow(enrResultsGrp)/4), limitsize=FALSE)
ggsave(tmpPng, logPlot, width=10, height = 2+round(nrow(enrResultsGrp)/5), limitsize=FALSE)
data.frame(file=tmpPng)
})
......@@ -481,7 +514,7 @@ l_ply(term_barplot_files$file, function(pngFile){ cat(paste0("<img src='", pngFi
########################################################################################################################
#' ## Enriched KEGG Pathways
#' To understand spatio-temporal changes in gene expression better we now overlay
#' To understand spatio-temporal changes in gene expression better we now overlay enriched kegg pathways with the -log10(q_value) of each contrast. The direction of the expression changes is encoded as color, whereby red indicates that sample_1 is overexpressed. Because we have multiple contrasts of interest, this defines a slice-color barcode for each gene. To relate the barcode to contrasts we define the following slice order:
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
......@@ -496,7 +529,8 @@ require(png)
# echo("no enriched kegg pathways were found in the dataset")
#}else{
keggOrCode <- "mmu"
#keggOrCode <- "mmu"
keggOrCode <- guess_pathview_species(degs$ensembl_gene_id)
## prepare p-value data
sliceData <- allDiff %>%
......@@ -603,7 +637,7 @@ pathwayPlots %>% l_ply(function(plotData){
rectDef=with(curNode, paste(x, y, x+width, y+height, sep=","))
paste0("<area href='", curNode$link,"' alt='Text' coords='",rectDef , "' shape='rect'>") %>% cat()
})
cat("</map>")
cat("</map><br>")
})
#}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment