Skip to content
Snippets Groups Projects
Commit ac3ea4a0 authored by Holger Brandl's avatar Holger Brandl
Browse files

count counts analysis

parent 08cd4192
No related branches found
No related tags found
No related merge requests found
......@@ -48,14 +48,15 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v
require(cummeRbund)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R")
require(knitr)
#knitr::opts_knit$set(root.dir = getwd())
########################################################################################################################
#' # Differential Gene Expression Analysis
print("configuration")
print(opts)
## todo Small overview about pipeline
vec2df(unlist(opts)) %>% kable()
#' [CuffDiff](http://cufflinks.cbcb.umd.edu/manual.html) and [cummeRbund](http://compbio.mit.edu/cummeRbund/) were used to perform this analysis. For details about how the test for differntial expression works, refer to [Differential analysis of gene regulation at transcript resolution with RNA-seq](http://www.nature.com/nbt/journal/v31/n1/full/nbt.2450.html).
......@@ -80,14 +81,6 @@ write.delim(replicateInfo, file="replicate_info.txt")
# replicateInfo <- read.delim("replicate_info.txt")
#' [replicateInfo](replicate_info.txt)
#+ eval=skipQC
#hello hidden field
#######################################################################################################################
#' ## Count and Expression Score Tables
## todo merge in basic gene info into all tables
## add gene description
martName <- guess_mart(fpkm(genes(cuff))$gene_id)
cacheFile <- paste0("geneInfo.",martName, ".RData")
......@@ -104,6 +97,13 @@ if(!file.exists(cacheFile)){
#+ eval=skipQC
#hello hidden field
#######################################################################################################################
#' ## Count and Expression Score Tables
#+ include=FALSE
#colnames(gFpkmMatrix) <- ifelse(is.na(sampleDic[colnames(gFpkmMatrix)]), colnames(gFpkmMatrix), sampleDic[colnames(gFpkmMatrix)])
fpkmMatrix(genes(cuff)) %>%
rownames2column("ensembl_gene_id") %>%
......@@ -143,6 +143,7 @@ repCountMatrix(genes(cuff)) %>%
#######################################################################################################################
#' ## Score Distributions
#+ include=FALSE
#if(!as.logical(opts$S)){
......@@ -175,6 +176,7 @@ csVolcanoMatrix(genes(cuff))
#######################################################################################################################
#' ## Replicate Correlation and Clustering
#+ include=FALSE
require.auto(edgeR)
plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS")
......@@ -344,13 +346,17 @@ write.delim(sigEnrResults, file="sigEnrResults.txt")
sigEnrResults %>% do({
enrResultsGrp <- .
## DEBUG enrResultsGrp <- sigEnrResults
label <-
dplyr::select(sigEnrResults, matches(ifelse(split_hit_list, "sample_1|sample_2|sample_1_overex", "sample_1|sample_2"))) %>%
dplyr::select(enrResultsGrp, matches(ifelse(split_hit_list, "sample_1|sample_2|sample_1_overex", "sample_1|sample_2"))) %>%
head(1) %>% apply(1, paste, collapse="_vs_")
echo("processing", label)
logPlot <- enrResultsGrp %>% ggplot(aes(reorder(Term, as.integer(Category)), PValue, fill=Category)) +
logPlot <- enrResultsGrp %>%
## fix factor order
mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(Category))) %>%
ggplot(aes(Term, PValue, fill=Category)) +
geom_bar(stat="identity")+
coord_flip() +
xlab("Enriched Terms") +
......@@ -373,9 +379,12 @@ sigEnrResults %>% do({
#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t")
#+
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>%
dplyr::select(kegg_pathway_id) %>% ac()
########################################################################################################################
#' ## Enriched KEGG Pathways
......@@ -385,9 +394,6 @@ require(png)
with(sigEnrResults, as.data.frame(table(Category)))
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
transform(kegg = colsplit(Term, "\\:", names = c('pathway_id', 'description')))
#if(nrow(keggPathways)==0){
# echo("no enriched kegg pathways were found in the dataset")
......@@ -424,7 +430,6 @@ plot_pathway <- function(pathwayID, overlayData){
gene.idtype="ensembl"
)
outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png")
## interactive plotting
......@@ -433,7 +438,9 @@ plot_pathway <- function(pathwayID, overlayData){
# lim <- par()
# rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
return(outfile)
pv.out$plotfile=outfile
pv.out$pathway_id=pathwayID
return(pv.out)
}
#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015")
......@@ -441,16 +448,19 @@ plot_pathway <- function(pathwayID, overlayData){
pathwayPlots <- keggPathways %>% laply(function(pathwayID){
# geneData <- merge(curKP, allDiff) %>% transmute(ensembl_gene_id, log2_fold_change) %>% column2rownames("ensembl_gene_id")
pathwayPlots <- keggPathways %>% llply(function(pathwayID){
plot_pathway(pathwayID, sliceData)
})
save(pathwayPlots, file=".pathwayPlots.RData")
# pathwayPlots <- local(get(load("pathwayPlots.RData")))
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
unlist(lapply(pathwayPlots, "[[", "plotfile"))
#+ results="asis"
#l_ply(pathwayPlots, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
# unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
cat("
<style type='text/css'>
......@@ -459,30 +469,21 @@ cat("
}
</style>
")
## extended version with clickable links
pathwayPlots %>% l_ply(function(pngFile){
pathwayPlots %>% l_ply(function(plotData){
#pngFile="mmu04015.pathview.png"
pathwayName=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
## read in the xml and extract the node positions
keggnod2df <- function(node){
keggNode <- cbind(
getNodeSet(node, "graphics")[[1]] %>% xmlAttrs %>% as.list() %>% as.df(),
xmlAttrs(node) %>% as.list() %>% as.df()
)
keggNode
}
# pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
pngFile <- plotData$plotfile
pathway_id=plotData$pathway_id
keggNodes <- xmlParse(paste0(pathwayName, ".xml")) %>%
getNodeSet( "/pathway/entry", fun=keggnod2df) %>%
rbind_all() %>% mutate_each(funs(as.numeric), x,y, width, height) %>%
keggNodes <- plotData$plot.data.gene %>%
## remove box offset
mutate(x=x-22, y=y-8) ## todo does not work for non-gene elements
## first the image itself
cat(paste0("<img usemap='#", pathwayName,"' src='", pngFile, "'><br>"))
cat(paste0("<map name='", pathwayName,"'>"))
cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>"))
cat(paste0("<map name='", pathway_id,"'>"))
#keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)}
#see http://www.html-world.de/180/image-map/
......
......@@ -14,6 +14,8 @@ export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
export PATH=/sw/apps/python/current/bin:$PATH
export PATH=/home/brandl/bin/deepTools/bin:$PATH
export PATH=/projects/bioinfo/holger/bin/FastQC_0.11.2:$PATH
export PATH=/projects/bioinfo/holger/bin/bedtools-2.23.0/bin/:$PATH
export PATH=/home/brandl/bin/spinr:$PATH
# which tophat; which bowtie2; which cuffdiff
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment