Skip to content
Snippets Groups Projects
Commit ac3ea4a0 authored by Holger Brandl's avatar Holger Brandl
Browse files

count counts analysis

parent 08cd4192
No related branches found
No related tags found
No related merge requests found
...@@ -48,14 +48,15 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v ...@@ -48,14 +48,15 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v
require(cummeRbund) require(cummeRbund)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R")
require(knitr)
#knitr::opts_knit$set(root.dir = getwd()) #knitr::opts_knit$set(root.dir = getwd())
######################################################################################################################## ########################################################################################################################
#' # Differential Gene Expression Analysis #' # Differential Gene Expression Analysis
print("configuration") print("configuration")
print(opts) vec2df(unlist(opts)) %>% kable()
## todo Small overview about pipeline
#' [CuffDiff](http://cufflinks.cbcb.umd.edu/manual.html) and [cummeRbund](http://compbio.mit.edu/cummeRbund/) were used to perform this analysis. For details about how the test for differntial expression works, refer to [Differential analysis of gene regulation at transcript resolution with RNA-seq](http://www.nature.com/nbt/journal/v31/n1/full/nbt.2450.html). #' [CuffDiff](http://cufflinks.cbcb.umd.edu/manual.html) and [cummeRbund](http://compbio.mit.edu/cummeRbund/) were used to perform this analysis. For details about how the test for differntial expression works, refer to [Differential analysis of gene regulation at transcript resolution with RNA-seq](http://www.nature.com/nbt/journal/v31/n1/full/nbt.2450.html).
...@@ -80,14 +81,6 @@ write.delim(replicateInfo, file="replicate_info.txt") ...@@ -80,14 +81,6 @@ write.delim(replicateInfo, file="replicate_info.txt")
# replicateInfo <- read.delim("replicate_info.txt") # replicateInfo <- read.delim("replicate_info.txt")
#' [replicateInfo](replicate_info.txt) #' [replicateInfo](replicate_info.txt)
#+ eval=skipQC
#hello hidden field
#######################################################################################################################
#' ## Count and Expression Score Tables
## todo merge in basic gene info into all tables
## add gene description ## add gene description
martName <- guess_mart(fpkm(genes(cuff))$gene_id) martName <- guess_mart(fpkm(genes(cuff))$gene_id)
cacheFile <- paste0("geneInfo.",martName, ".RData") cacheFile <- paste0("geneInfo.",martName, ".RData")
...@@ -104,6 +97,13 @@ if(!file.exists(cacheFile)){ ...@@ -104,6 +97,13 @@ if(!file.exists(cacheFile)){
#+ eval=skipQC
#hello hidden field
#######################################################################################################################
#' ## Count and Expression Score Tables
#+ include=FALSE
#colnames(gFpkmMatrix) <- ifelse(is.na(sampleDic[colnames(gFpkmMatrix)]), colnames(gFpkmMatrix), sampleDic[colnames(gFpkmMatrix)]) #colnames(gFpkmMatrix) <- ifelse(is.na(sampleDic[colnames(gFpkmMatrix)]), colnames(gFpkmMatrix), sampleDic[colnames(gFpkmMatrix)])
fpkmMatrix(genes(cuff)) %>% fpkmMatrix(genes(cuff)) %>%
rownames2column("ensembl_gene_id") %>% rownames2column("ensembl_gene_id") %>%
...@@ -143,6 +143,7 @@ repCountMatrix(genes(cuff)) %>% ...@@ -143,6 +143,7 @@ repCountMatrix(genes(cuff)) %>%
####################################################################################################################### #######################################################################################################################
#' ## Score Distributions #' ## Score Distributions
#+ include=FALSE
#if(!as.logical(opts$S)){ #if(!as.logical(opts$S)){
...@@ -175,6 +176,7 @@ csVolcanoMatrix(genes(cuff)) ...@@ -175,6 +176,7 @@ csVolcanoMatrix(genes(cuff))
####################################################################################################################### #######################################################################################################################
#' ## Replicate Correlation and Clustering #' ## Replicate Correlation and Clustering
#+ include=FALSE
require.auto(edgeR) require.auto(edgeR)
plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS") plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS")
...@@ -344,13 +346,17 @@ write.delim(sigEnrResults, file="sigEnrResults.txt") ...@@ -344,13 +346,17 @@ write.delim(sigEnrResults, file="sigEnrResults.txt")
sigEnrResults %>% do({ sigEnrResults %>% do({
enrResultsGrp <- . enrResultsGrp <- .
## DEBUG enrResultsGrp <- sigEnrResults
label <- label <-
dplyr::select(sigEnrResults, matches(ifelse(split_hit_list, "sample_1|sample_2|sample_1_overex", "sample_1|sample_2"))) %>% dplyr::select(enrResultsGrp, matches(ifelse(split_hit_list, "sample_1|sample_2|sample_1_overex", "sample_1|sample_2"))) %>%
head(1) %>% apply(1, paste, collapse="_vs_") head(1) %>% apply(1, paste, collapse="_vs_")
echo("processing", label) echo("processing", label)
logPlot <- enrResultsGrp %>% ggplot(aes(reorder(Term, as.integer(Category)), PValue, fill=Category)) + logPlot <- enrResultsGrp %>%
## fix factor order
mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(Category))) %>%
ggplot(aes(Term, PValue, fill=Category)) +
geom_bar(stat="identity")+ geom_bar(stat="identity")+
coord_flip() + coord_flip() +
xlab("Enriched Terms") + xlab("Enriched Terms") +
...@@ -373,9 +379,12 @@ sigEnrResults %>% do({ ...@@ -373,9 +379,12 @@ sigEnrResults %>% do({
#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t") #write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE, sep="\t")
#+ #+
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>%
dplyr::select(kegg_pathway_id) %>% ac()
######################################################################################################################## ########################################################################################################################
#' ## Enriched KEGG Pathways #' ## Enriched KEGG Pathways
...@@ -385,9 +394,6 @@ require(png) ...@@ -385,9 +394,6 @@ require(png)
with(sigEnrResults, as.data.frame(table(Category))) with(sigEnrResults, as.data.frame(table(Category)))
keggPathways <- sigEnrResults %>%
filter(Category=="KEGG_PATHWAY") %>%
transform(kegg = colsplit(Term, "\\:", names = c('pathway_id', 'description')))
#if(nrow(keggPathways)==0){ #if(nrow(keggPathways)==0){
# echo("no enriched kegg pathways were found in the dataset") # echo("no enriched kegg pathways were found in the dataset")
...@@ -424,7 +430,6 @@ plot_pathway <- function(pathwayID, overlayData){ ...@@ -424,7 +430,6 @@ plot_pathway <- function(pathwayID, overlayData){
gene.idtype="ensembl" gene.idtype="ensembl"
) )
outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png") outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png")
## interactive plotting ## interactive plotting
...@@ -433,7 +438,9 @@ plot_pathway <- function(pathwayID, overlayData){ ...@@ -433,7 +438,9 @@ plot_pathway <- function(pathwayID, overlayData){
# lim <- par() # lim <- par()
# rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]) # rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
return(outfile) pv.out$plotfile=outfile
pv.out$pathway_id=pathwayID
return(pv.out)
} }
#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015") #keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015")
...@@ -441,16 +448,19 @@ plot_pathway <- function(pathwayID, overlayData){ ...@@ -441,16 +448,19 @@ plot_pathway <- function(pathwayID, overlayData){
pathwayPlots <- keggPathways %>% laply(function(pathwayID){ pathwayPlots <- keggPathways %>% llply(function(pathwayID){
# geneData <- merge(curKP, allDiff) %>% transmute(ensembl_gene_id, log2_fold_change) %>% column2rownames("ensembl_gene_id")
plot_pathway(pathwayID, sliceData) plot_pathway(pathwayID, sliceData)
}) })
save(pathwayPlots, file=".pathwayPlots.RData") save(pathwayPlots, file=".pathwayPlots.RData")
# pathwayPlots <- local(get(load("pathwayPlots.RData"))) # pathwayPlots <- local(get(load("pathwayPlots.RData")))
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
unlist(lapply(pathwayPlots, "[[", "plotfile"))
#+ results="asis" #+ results="asis"
#l_ply(pathwayPlots, function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))}) # unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
cat(" cat("
<style type='text/css'> <style type='text/css'>
...@@ -459,30 +469,21 @@ cat(" ...@@ -459,30 +469,21 @@ cat("
} }
</style> </style>
") ")
## extended version with clickable links ## extended version with clickable links
pathwayPlots %>% l_ply(function(pngFile){ pathwayPlots %>% l_ply(function(plotData){
#pngFile="mmu04015.pathview.png" #pngFile="mmu04015.pathview.png"
pathwayName=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1] # pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
pngFile <- plotData$plotfile
## read in the xml and extract the node positions pathway_id=plotData$pathway_id
keggnod2df <- function(node){
keggNode <- cbind(
getNodeSet(node, "graphics")[[1]] %>% xmlAttrs %>% as.list() %>% as.df(),
xmlAttrs(node) %>% as.list() %>% as.df()
)
keggNode
}
keggNodes <- xmlParse(paste0(pathwayName, ".xml")) %>% keggNodes <- plotData$plot.data.gene %>%
getNodeSet( "/pathway/entry", fun=keggnod2df) %>%
rbind_all() %>% mutate_each(funs(as.numeric), x,y, width, height) %>%
## remove box offset ## remove box offset
mutate(x=x-22, y=y-8) ## todo does not work for non-gene elements mutate(x=x-22, y=y-8) ## todo does not work for non-gene elements
## first the image itself ## first the image itself
cat(paste0("<img usemap='#", pathwayName,"' src='", pngFile, "'><br>")) cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>"))
cat(paste0("<map name='", pathwayName,"'>")) cat(paste0("<map name='", pathway_id,"'>"))
#keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)} #keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)}
#see http://www.html-world.de/180/image-map/ #see http://www.html-world.de/180/image-map/
......
...@@ -14,6 +14,8 @@ export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH ...@@ -14,6 +14,8 @@ export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
export PATH=/sw/apps/python/current/bin:$PATH export PATH=/sw/apps/python/current/bin:$PATH
export PATH=/home/brandl/bin/deepTools/bin:$PATH export PATH=/home/brandl/bin/deepTools/bin:$PATH
export PATH=/projects/bioinfo/holger/bin/FastQC_0.11.2:$PATH export PATH=/projects/bioinfo/holger/bin/FastQC_0.11.2:$PATH
export PATH=/projects/bioinfo/holger/bin/bedtools-2.23.0/bin/:$PATH
export PATH=/home/brandl/bin/spinr:$PATH
# which tophat; which bowtie2; which cuffdiff # which tophat; which bowtie2; which cuffdiff
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment