dge_analysis.R 18.8 KB
Newer Older
Holger Brandl's avatar
Holger Brandl committed
1
#!/usr/bin/env Rscript
Holger Brandl's avatar
Holger Brandl committed
2
#+ setup, cache=FALSE
3

Holger Brandl's avatar
Holger Brandl committed
4
suppressMessages(require(docopt))
5

Holger Brandl's avatar
Holger Brandl committed
6 7
doc <- '
Convert a cuffdiff into a dge-report
Holger Brandl's avatar
Holger Brandl committed
8
Usage: dge_analysis.R [options] <cuffdb_directory>
9

Holger Brandl's avatar
Holger Brandl committed
10
Options:
Holger Brandl's avatar
Holger Brandl committed
11 12
--constrasts=<tab_delim_table> Table with sample pairs for which dge analysis should be performed
--undirected           Perform directed dge-steps in a directed manner
Holger Brandl's avatar
Holger Brandl committed
13 14
--qcutoff <qcutoff>    Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]
--pcutoff <pcutoff>    Override q-value filter and filter by p-value instead
Holger Brandl's avatar
Holger Brandl committed
15
--minfpkm <minfpkm>   Minimal expression in any of the sample to be included in the final result list [default: 1]
Holger Brandl's avatar
Holger Brandl committed
16
-S                     Dont do general statistics and qc analysis
Holger Brandl's avatar
Holger Brandl committed
17
'
18

Holger Brandl's avatar
Holger Brandl committed
19
#opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
Holger Brandl's avatar
Holger Brandl committed
20
# opts <- docopt(doc, "--undirected  ..")
Holger Brandl's avatar
Holger Brandl committed
21
# opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), ".."))
Holger Brandl's avatar
Holger Brandl committed
22
## todo use commandArgs here!!
Holger Brandl's avatar
Holger Brandl committed
23
opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" ")))
Holger Brandl's avatar
Holger Brandl committed
24
#opts
Holger Brandl's avatar
Holger Brandl committed
25 26

skipQC <<- opts$S
Holger Brandl's avatar
Holger Brandl committed
27 28
split_hit_list <- !opts$undirected
constrasts_file <- opts$constrasts
Holger Brandl's avatar
Holger Brandl committed
29 30
pcutoff <- if(is.null(opts$pcutoff)) NULL else as.numeric(opts$pcutoff)
qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff)
Holger Brandl's avatar
Holger Brandl committed
31
minFPKM <- as.numeric(opts$minfpkm)
Holger Brandl's avatar
Holger Brandl committed
32 33

stopifnot(is.numeric(qcutoff) | is.numeric(pcutoff))
Holger Brandl's avatar
Holger Brandl committed
34

Holger Brandl's avatar
Holger Brandl committed
35
cuffdb_directory <- normalizePath(opts$cuffdb_directory)
36

Holger Brandl's avatar
Holger Brandl committed
37 38 39 40
if(is.na(file.info(cuffdb_directory)$isdir)){
    stop(paste("db directory does not exist", doc))
}

Holger Brandl's avatar
Holger Brandl committed
41

42 43 44
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.9/R/core_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.9/R/ggplot_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.9/R/bio/diffex_commons.R")
Holger Brandl's avatar
Holger Brandl committed
45 46 47

require(cummeRbund)

Holger Brandl's avatar
Holger Brandl committed
48 49
require(knitr)

Holger Brandl's avatar
Holger Brandl committed
50 51
#knitr::opts_knit$set(root.dir = getwd())

Holger Brandl's avatar
Holger Brandl committed
52 53
########################################################################################################################
#' # Differential Gene Expression Analysis
54

55 56
print("Configuration")
vec2df(unlist(opts)) %>% filter(!str_detect(name, "^[<-]")) %>% kable()
Holger Brandl's avatar
Holger Brandl committed
57

Holger Brandl's avatar
Holger Brandl committed
58 59 60
#' [CuffDiff](http://cufflinks.cbcb.umd.edu/manual.html) and [cummeRbund](http://compbio.mit.edu/cummeRbund/) were used to perform this analysis. For details about how the test for differntial expression works, refer to [Differential analysis of gene regulation at transcript resolution with RNA-seq](http://www.nature.com/nbt/journal/v31/n1/full/nbt.2450.html).

#print(paste("db dir is", cuffdb_directory))
Holger Brandl's avatar
Holger Brandl committed
61

Holger Brandl's avatar
Holger Brandl committed
62
#' Used cuffdiff database in `r cuffdb_directory`
63

Holger Brandl's avatar
Holger Brandl committed
64 65
#+ load_db, cache=FALSE

66 67 68 69 70 71 72 73 74 75 76
isCluster=Sys.getenv("LSF_SERVERDIR")!=""
dbTempCopyRequired=isCluster | str_detect(normalizePath(cuffdb_directory), fixed("/Volumes/projects"))

if(dbTempCopyRequired){
    ## workaround until sqlite  is fixed on lustre
    tmpdir <- tempfile("cuffdb_dir")
    system(paste("cp -r", cuffdb_directory, tmpdir))
    cuff <- readCufflinks(dir=tmpdir)
}else{
    cuff <- readCufflinks(dir="..")
}
Holger Brandl's avatar
Holger Brandl committed
77

78 79 80

cuff
runInfo(cuff)
Holger Brandl's avatar
Holger Brandl committed
81
replicateInfo <- replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_mass, internal_scale, external_scale))
Holger Brandl's avatar
Holger Brandl committed
82 83
replicateInfo

84 85 86
write.delim(replicateInfo, file="replicate_info.txt")
# replicateInfo <- read.delim("replicate_info.txt")
#'  [replicateInfo](replicate_info.txt)
87

88
## add gene description
89
martName <- guess_mart(fpkm(genes(cuff))$gene_id)
90 91 92 93 94 95 96
cacheFile <- paste0("geneInfo.",martName, ".RData")
if(!file.exists(cacheFile)){
    require(biomaRt)

    mousemart = useDataset(martName, mart=useMart("ensembl"))
    geneInfo <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'description', 'gene_biotype'), mart=mousemart);
    save(geneInfo, file=cacheFile)
Holger Brandl's avatar
Holger Brandl committed
97
    unloadNamespace('biomaRt')
98 99 100
}else{
    geneInfo <- local(get(load(cacheFile)))
}
Holger Brandl's avatar
Holger Brandl committed
101

Holger Brandl's avatar
Holger Brandl committed
102 103 104 105 106 107
#+ eval=skipQC
#hello hidden field

#######################################################################################################################
#' ## Count and Expression Score Tables

Holger Brandl's avatar
Holger Brandl committed
108
#colnames(gFpkmMatrix) <- ifelse(is.na(sampleDic[colnames(gFpkmMatrix)]), colnames(gFpkmMatrix), sampleDic[colnames(gFpkmMatrix)])
109 110
fpkmMatrix(genes(cuff)) %>%
    rownames2column("ensembl_gene_id") %>%
Holger Brandl's avatar
Holger Brandl committed
111 112
    merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
    print_head() %>%
113 114 115
    write.delim(file="gene_fpkms.txt")
# gene_fpkms <- read.delim("gene_fpkms.txt")
#' [Annotated Expresssion Table](gene_fpkms.txt)
Holger Brandl's avatar
Holger Brandl committed
116

117 118 119 120
## export the same but now including replicate information

repFpkmMatrix(genes(cuff)) %>%
    rownames2column("ensembl_gene_id") %>%
Holger Brandl's avatar
Holger Brandl committed
121 122
    merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
    print_head() %>%
123
    write.delim(file="gene_fpkms_by_replicate.txt")
Holger Brandl's avatar
Holger Brandl committed
124
# gene_fpkms_by_replicate <- read.delim("gene_fpkms_by_replicate.txt")
125
#' [Annotated Expresssion Table](gene_fpkms_by_replicate.txt)
Holger Brandl's avatar
Holger Brandl committed
126

127
## also export count tables
128 129
countMatrix(genes(cuff)) %>%
    rownames2column("ensembl_gene_id") %>%
130 131
    merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
    write.delim(file="gene_counts.txt")
Holger Brandl's avatar
Holger Brandl committed
132

133 134 135
#'  [Gene Counts](gene_counts.txt)

repCountMatrix(genes(cuff)) %>%
136 137
    rownames2column("ensembl_gene_id") %>%
    merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
138
    write.delim(file="gene_counts_by_replicate.txt")
Holger Brandl's avatar
Holger Brandl committed
139
# repCounts <- read.delim("replicateCounts.txt")
140
#'  [Gene Counts by Replicate](replicateCounts.txt)
Holger Brandl's avatar
Holger Brandl committed
141 142

##ggplot(geneFpkm, aes(fpkm)) + geom_histogram() + scale_x_log10()
Holger Brandl's avatar
Holger Brandl committed
143 144


145
#######################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
146
#' ## Score Distributions
147

Holger Brandl's avatar
Holger Brandl committed
148 149 150

#if(!as.logical(opts$S)){

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
# create some global summary statistic plots
dispersionPlot(genes(cuff)) #+ aes(alpha=0.01)

csDensity(genes(cuff))
#ggsave2(csBoxplot(genes(cuff))+ ggtitle("fpkm boxplots"))

csDensity(genes(cuff),features=T)
csBoxplot(genes(cuff),replicates=T)

fpkmSCVPlot(genes(cuff))


#csScatter(genes(cuff), "a", "d", smooth=F) + aes(color=abs(log2(a/b))>2) + scale_colour_manual(values=c("darkgreen", "red"))

#MAplot(genes(cuff), "a", "d")+ aes(color=abs(log2(M))>2)+ ggtitle("MA-plot: a/d")
#MAplot(genes(cuff), "a", "b", useCount=T) #+ aes(color=abs(log2(M))>2)+ ggtitle("MA-plot: Luc/KD")


#csVolcano(genes(cuff),"aRGm", "N") + ggtitle("odd line-shaped fan-out in volcano plot")

171 172
## todo consider to add more pretty volcano plots (see stefania's extra plots

173 174

#csVolcano(genes(cuff),"a", "b")
175
## todo replace or complement with marta plot
176 177 178
csVolcanoMatrix(genes(cuff))
#csVolcano(genes(cuff),"a", "b")

Holger Brandl's avatar
Holger Brandl committed
179 180
#######################################################################################################################
#' ## Replicate Correlation and Clustering
181 182 183 184

require.auto(edgeR)
plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS")

Holger Brandl's avatar
Holger Brandl committed
185 186 187 188

if(unlen(replicates(cuff)$sample)>2){  ## >2 only, because MDS will fail with just 2 samples
    MDSplot(genes(cuff)) + ggtitle("mds plot")
}
189 190 191

MDSplot(genes(cuff), replicates=T) + ggtitle("mds plot")

192 193
## todo replace with marta plot
#PCAplot(genes(cuff),"PC1","PC2",replicates=T)
194 195 196

noTextLayer <- function(gg){ gg$layers[[2]] <- NULL;; gg}

197 198

## todo use fix scaled here
Holger Brandl's avatar
Holger Brandl committed
199
noTextLayer(csDistHeat(genes(cuff)) +  ggtitle("Sample correlation"))
200

Holger Brandl's avatar
Holger Brandl committed
201
noTextLayer(csDistHeat(genes(cuff),replicates=T) +  ggtitle("Replicate Correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15))
202 203 204 205 206 207 208 209

csDendro(genes(cuff),replicates=T)

#pdf(paste0(basename(getwd()),"_rep_clustering.pdf"))
#csDendro(genes(cuff),replicates=T)
#dev.off()


210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
#' Also cluster expression filtered data
exprGenes <- repFpkmMatrix(genes(cuff)) %>%
    rownames2column("ensembl_gene_id") %>%
    melt(value.name="fpkm") %>%
    group_by(ensembl_gene_id) %>%
    ## apply log trafo
    mutate(log_fpkm=log10(fpkm+1)) %>%
    ## apply expression filter
    filter(max(log_fpkm)>1) %>%
    dcast( ensembl_gene_id ~ variable, value.var="log_fpkm") %>%
    column2rownames("ensembl_gene_id")

res<-JSdist(makeprobs(exprGenes)) %>% hclust() %>% as.dendrogram()

par(mar=c(10,4,4,4))
plot(res, main="Replicate Clustering of Expressed Genes")


228 229 230
#######################################################################################################################
#' ## Extract lists of differentially expressed genes

Holger Brandl's avatar
Holger Brandl committed
231 232 233
if(!is.null(constrasts_file)){
    echo("using contrasts matrix from: ", basename(constrasts_file))

234
    compPairsUniDir <- read.csv(constrasts_file, header=F) %>% set_names(c("sample_1", "sample_2"))
Holger Brandl's avatar
Holger Brandl committed
235 236 237

    ## add other direction for completeness
    compPairs <- rbind_list(compPairsUniDir, compPairsUniDir %>% select(sample_1=sample_2, sample_2=sample_1))
Holger Brandl's avatar
Holger Brandl committed
238 239 240
}else{
    echo("comparing all samples against each other")
    compPairs <- diffData(genes(cuff)) %>% select(sample_1, sample_2) %>% distinct()
Holger Brandl's avatar
Holger Brandl committed
241 242 243
}


244
allDiff <- diffData(genes(cuff)) %>%
Holger Brandl's avatar
Holger Brandl committed
245 246 247
    ## just keep pairs of interest
    merge(compPairs) %>%

248
    ##  discard all genes that are not expressed in any of the cell types (>1)
Holger Brandl's avatar
Holger Brandl committed
249
    filter(gene_id %in% getExpressedGenes(cuff, minFPKM=minFPKM)) %>%
250 251 252
    dplyr::rename(ensembl_gene_id=gene_id) %>%
    mutate(sample_1_overex=log2_fold_change<0)

Holger Brandl's avatar
Holger Brandl committed
253 254
#getExpressedGenes(cuff, minFPKM=minFPKM) %>% writeLines(paste0("genes_expressed_gt_", minFPKM[1], "_in_at_least_one_sample.txt"))

Holger Brandl's avatar
Holger Brandl committed
255 256
# diffData <- read.delim("diffData.txt")
#'  [diffData](diffData.txt)
257

Holger Brandl's avatar
Holger Brandl committed
258
## ' Used cutoff criterion was `r if(use_pcutoff) echo("p_value<0.01") else echo("q_value<=0.01")`
259

Holger Brandl's avatar
Holger Brandl committed
260 261
#+ results='asis'
if(!is.null(qcutoff)){
Holger Brandl's avatar
Holger Brandl committed
262
    echo("Using q-value cutoff of", qcutoff)
Holger Brandl's avatar
Holger Brandl committed
263
    allDiff <- transform(allDiff, is_hit=q_value<=qcutoff)
Holger Brandl's avatar
Holger Brandl committed
264
}else{
Holger Brandl's avatar
Holger Brandl committed
265
    echo("Using p-value cutoff of", pcutoff)
Holger Brandl's avatar
Holger Brandl committed
266
    allDiff <- transform(allDiff, is_hit=p_value<=pcutoff)
Holger Brandl's avatar
Holger Brandl committed
267
}
268

269 270 271
## save as reference
merge(allDiff, geneInfo, by="ensembl_gene_id", all.x=T) %>% write.delim(file="diffData.txt")

272

Holger Brandl's avatar
Holger Brandl committed
273 274
#+
# #' Used cutoff criterion was: q_value<0.01
Holger Brandl's avatar
Holger Brandl committed
275
# allDiff <- transform(allDiff, is_hit=q_value<0.01)
Holger Brandl's avatar
Holger Brandl committed
276

277

Holger Brandl's avatar
Holger Brandl committed
278
degs <- subset(allDiff, is_hit)
279 280
#degs <- subset(allDiff, significant=="yes")

Holger Brandl's avatar
Holger Brandl committed
281

282 283
## todo embedd as paginated DataTable

284 285
with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0)

286 287
## add gene info
degs <- merge(degs, geneInfo, by="ensembl_gene_id", all.x=T)
Holger Brandl's avatar
Holger Brandl committed
288

289 290 291 292 293

write.delim(degs, file="degs.txt")
# degs <- read.delim("degs.txt")
#' [dge table](degs.txt)

Holger Brandl's avatar
Holger Brandl committed
294 295 296 297 298
ggplot(degs, aes(paste(sample_1, "vs",  sample_2), fill=status)) + geom_bar() +xlab(NULL) + ylab("# DGEs") +ggtitle("DGE count summary") + coord_flip()

ggplot(degs, aes(paste(sample_1, "vs",  sample_2), fill=sample_1_overex)) + geom_bar(position="dodge") + xlab(NULL) + ylab("# DGEs") + ggtitle("DGE counts by direction") + coord_flip()

## just needed to restore environment easily
Holger Brandl's avatar
Holger Brandl committed
299 300
save(degs, file=".degs.RData")
# degs <- local(get(load(".degs.RData")))
301

Holger Brandl's avatar
Holger Brandl committed
302 303


304
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
305
#' ## Term enrichment
306 307
#+ echo=FALSE

Holger Brandl's avatar
Holger Brandl committed
308
#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`
Holger Brandl's avatar
Holger Brandl committed
309

Holger Brandl's avatar
Holger Brandl committed
310
geneLists <- degs %>%
311 312
#    transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_"))
    transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
313

Holger Brandl's avatar
Holger Brandl committed
314 315 316 317 318 319 320
grpdDegs <- if(split_hit_list){
    degs %>% group_by(sample_1, sample_2, sample_1_overex)
}else{
    degs %>% group_by(sample_1, sample_2)
}

enrResults <- grpdDegs %>% do(davidAnnotationChart(.$ensembl_gene_id))
321

Holger Brandl's avatar
Holger Brandl committed
322

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
write.delim(enrResults, file="enrResults.txt")
# enrResults <- read.delim("enrResults.txt")
#'  [Enrichment Results](enrResults.txt)

sigEnrResults <- subset(enrResults, Bonferroni <0.01)

write.delim(sigEnrResults, file="sigEnrResults.txt")
# sigEnrResults <- read.delim("sigEnrResults.txt")
#'  [Very Significant Terms](sigEnrResults.txt)


## plot the enrichment results
#sigEnrResults %>% group_by(Category, add=T) %>% do({
#    logPlot <- . %>% ggplot(aes(Term, PValue)) +
#	    geom_bar(stat="identity")+coord_flip() +
#	    xlab("Enriched Terms") +
#	    ggtitle(.$Category[1]) +
#	    scale_y_log10()
#	    print(logPlot)
#})

sigEnrResults %>% do({
    enrResultsGrp <- .
Holger Brandl's avatar
Holger Brandl committed
346
    ## DEBUG enrResultsGrp <- sigEnrResults
Holger Brandl's avatar
Holger Brandl committed
347
    label <-
Holger Brandl's avatar
Holger Brandl committed
348
    dplyr::select(enrResultsGrp, matches(ifelse(split_hit_list, "sample_1|sample_2|sample_1_overex", "sample_1|sample_2"))) %>%
Holger Brandl's avatar
Holger Brandl committed
349 350
      head(1) %>% apply(1, paste, collapse="_vs_")

351 352
    echo("processing", label)

Holger Brandl's avatar
Holger Brandl committed
353 354 355 356
    logPlot <- enrResultsGrp %>%
        ## fix factor order
        mutate(Term=reorder(Term, -PValue) %>% reorder(as.integer(Category))) %>%
        ggplot(aes(Term, PValue, fill=Category)) +
357 358 359 360 361 362
	    geom_bar(stat="identity")+
	    coord_flip() +
	    xlab("Enriched Terms") +
	    ggtitle(label) +
	    scale_y_log10()

Holger Brandl's avatar
Holger Brandl committed
363
	    ggsave(paste0(label, " enrichmed terms.pdf"))
364 365
	    print(logPlot)
})
Holger Brandl's avatar
Holger Brandl committed
366
#ggsave2()
367 368 369 370 371 372 373 374 375 376

#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type))

#sigEnrResults <- arrange(sigEnrResults, site_type, set, -Bonferroni)
#sigEnrResults$Term <-lockFactorOrder(sigEnrResults$Term)

#ggplot(sigEnrResults, aes(Term, -log10(Bonferroni))) +coord_flip() + geom_bar() + facet_wrap(~site_type + set)
#ggplot(sigEnrResults, aes(reorder(set, -Bonferroni), -log10(Bonferroni), label=Term)) + geom_text(alpha=0.6, size=3) + ggtitle("enriched terms by set") + scale_y_log10()
#ggsave2(width=14, height=12)

377
#write.table(sigEnrResults, file=paste0("sigEnrTerms.txt"), row.names=FALSE,  sep="\t")
378

Holger Brandl's avatar
Holger Brandl committed
379
#with(sigEnrResults, as.data.frame(table(Category)))
Holger Brandl's avatar
Holger Brandl committed
380 381 382
keggPathways <- sigEnrResults %>%
    filter(Category=="KEGG_PATHWAY") %>%
    separate(Term, c('kegg_pathway_id', 'pathway_description'), sep="\\:", remove=F) %>%
Holger Brandl's avatar
Holger Brandl committed
383
    with(kegg_pathway_id) %>% ac()
Holger Brandl's avatar
Holger Brandl committed
384

Holger Brandl's avatar
Holger Brandl committed
385 386 387 388 389 390
########################################################################################################################
#' ## Enriched KEGG Pathways

require(pathview)
require(png)

Holger Brandl's avatar
Holger Brandl committed
391 392
#if(nrow(keggPathways)==0){
#    echo("no enriched kegg pathways were found in the dataset")
Holger Brandl's avatar
Holger Brandl committed
393 394
#}else{

Holger Brandl's avatar
Holger Brandl committed
395
keggOrCode <- "mmu"
Holger Brandl's avatar
Holger Brandl committed
396 397 398 399 400 401 402 403 404 405 406 407 408

## prepare p-value data
sliceData <- allDiff %>%
    mutate(
        ## define slice labels (needed for casting)
        comparison=paste0(sample_1, "__vs__", sample_2),
        ## create normalized linear score for slice plotting
        sample_1_overex=log2_fold_change<0,
        plot_score=-log10(q_value)*ifelse(sample_1_overex, 1, -1)
    ) %>%
    dcast(ensembl_gene_id ~ comparison, value.var="plot_score") %>%
    column2rownames("ensembl_gene_id")

Holger Brandl's avatar
Holger Brandl committed
409 410 411
#sliceData %>% head %>% kable()

data.frame(set=names(sliceData)) %>% mutate(slice_index=row_number()) %>% kable()
Holger Brandl's avatar
Holger Brandl committed
412 413 414 415


plot_pathway <- function(pathwayID, overlayData){
#    pathwayID="mmu04015"
Holger Brandl's avatar
Holger Brandl committed
416 417
#    browser()
    echo("processing pathway", pathwayID)
Holger Brandl's avatar
Holger Brandl committed
418 419 420
    pv.out <- pathview(
            gene.data = overlayData,
            pathway.id = pathwayID,
Holger Brandl's avatar
Holger Brandl committed
421
            species = keggOrCode,
Holger Brandl's avatar
Holger Brandl committed
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
#            out.suffix = pathwayID$kegg.description,
#            out.suffix = pathwayID,
            multi.state = ncol(overlayData)>1,
#            kegg.native=F,
#            node.sum = "mean", # the method name to calculate node summary given that multiple genes or compounds are mapped to it
            limit = list(gene = c(-4,4)),
            gene.idtype="ensembl"
    )

    outfile <- paste0(pathwayID, ".pathview", ifelse(ncol(overlayData)>1, ".multi", ""), ".png")

    ## interactive plotting
#    ima <- readPNG(outfile)
#    plot.new()
#    lim <- par()
#    rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])

Holger Brandl's avatar
Holger Brandl committed
439 440
    pv.out$plotfile=outfile
    pv.out$pathway_id=pathwayID
Holger Brandl's avatar
Holger Brandl committed
441

Holger Brandl's avatar
Holger Brandl committed
442
    return(pv.out)
Holger Brandl's avatar
Holger Brandl committed
443 444 445 446 447 448 449
}

#keggPathways <- c("mmu04976", "mmu04972", "mmu04810", "mmu04520", "mmu04530", "mmu04270", "mmu04015")
#keggPathways <- c("mmu04015")



Holger Brandl's avatar
Holger Brandl committed
450
pathwayPlots <- keggPathways %>% llply(function(pathwayID){
Holger Brandl's avatar
Holger Brandl committed
451 452 453
    plot_pathway(pathwayID, sliceData)
})

Holger Brandl's avatar
Holger Brandl committed
454

Holger Brandl's avatar
Holger Brandl committed
455 456 457
save(pathwayPlots, file=".pathwayPlots.RData")
# pathwayPlots <- local(get(load("pathwayPlots.RData")))

Holger Brandl's avatar
Holger Brandl committed
458

Holger Brandl's avatar
Holger Brandl committed
459
#+ results="asis"
Holger Brandl's avatar
Holger Brandl committed
460 461 462 463

## simple non-clickable plots
## http://stackoverflow.com/questions/12588323/r-how-to-extract-values-for-the-same-named-elements-across-multiple-objects-of
#unlist(lapply(pathwayPlots, "[[", "plotfile"))
Holger Brandl's avatar
Holger Brandl committed
464
# unlist(lapply(pathwayPlots, "[[", "plotfile")) %>% l_ply(function(pngFile){ cat(paste0("<img src='", pngFile, "'><br>"))})
Holger Brandl's avatar
Holger Brandl committed
465

466 467 468 469 470 471 472 473 474 475
#cat("
#<style type='text/css'>
# img {
#   max-width: 100%;
# }
#</style>
#")

## todo add tooltips with additinal info
## http://stackoverflow.com/questions/5478940/how-to-create-a-html-tooltip-on-top-of-image-map
Holger Brandl's avatar
Holger Brandl committed
476

Holger Brandl's avatar
Holger Brandl committed
477
## extended version with clickable links
Holger Brandl's avatar
Holger Brandl committed
478
pathwayPlots %>% l_ply(function(plotData){
Holger Brandl's avatar
Holger Brandl committed
479
    #pngFile="mmu04015.pathview.png"
Holger Brandl's avatar
Holger Brandl committed
480 481 482
#    pathway_id=(basename(pngFile) %>% str_split_fixed("[.]", 2))[,1]
    pngFile <- plotData$plotfile
    pathway_id=plotData$pathway_id
Holger Brandl's avatar
Holger Brandl committed
483

Holger Brandl's avatar
Holger Brandl committed
484
    keggNodes <- plotData$plot.data.gene %>%
Holger Brandl's avatar
Holger Brandl committed
485
        ## remove box offset
Holger Brandl's avatar
Holger Brandl committed
486 487 488 489
        mutate(x=x-22, y=y-8) %>% ## todo does not work for non-gene elements
        mutate(link=paste0("http://www.kegg.jp/dbget-bin/www_bget?", keggOrCode, ":", kegg.names ))

#    "http://www.kegg.jp/dbget-bin/www_bget?mmu:16412
Holger Brandl's avatar
Holger Brandl committed
490 491

    ## first the image itself
492 493
#    cat(paste0("<img usemap='#", pathway_id,"' src='", pngFile, "'><br>"))
    cat(paste0("<div style='width: 2000px'><img style='float: left' usemap='#", pathway_id,"' src='", pngFile, "'></div><br>"))
Holger Brandl's avatar
Holger Brandl committed
494
    cat(paste0("<map name='", pathway_id,"'>"))
Holger Brandl's avatar
Holger Brandl committed
495 496 497 498 499 500 501 502 503 504
    #keggNodes %>% rowwise() %>% {curNode=.; cat(curNode$name)}

    #see http://www.html-world.de/180/image-map/
    keggNodes %>% a_ply(1, function(curNode){
        rectDef=with(curNode, paste(x, y, x+width, y+height, sep=","))
        paste0("<area href='", curNode$link,"' alt='Text' coords='",rectDef , "' shape='rect'>") %>% cat()
    })
    cat("</map>")
})

Holger Brandl's avatar
Holger Brandl committed
505
#}
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535



#+ include=FALSE
# ########################################################################################################################
# #' ## Protein-Protein Interaction Clusters using String
#
# ## see http://www.bioconductor.org/packages/release/bioc/vignettes/STRINGdb/inst/doc/STRINGdb.pdf
#
# #' We can use the string-db web service to look for protein-protein interactions.
# #' This normally results in a large hairball and many unconnected nodes and so we can look for enriched clusters.
# #' Here we plot using (high confidence interactions) enriched clusters with a node size greater than 5 proteins for our list of candidate genes showing up in at least two replicates
#
# #+ PPI plots , fig.width=30, fig.height=30, fig.retina=2, warning=FALSE, tidy=TRUE
#
# #taxTable <- list(ENSG=9606,  ENSMUSG=10090)
# #taxId <- taxTable[[degs$gene_id[1] %>% ac() %>% str_extract("^([A-z]+)")]]
# #
# #require.auto(STRINGdb)
# #
# #string_db <- STRINGdb$new(score_threshold=400, species=taxId, input_directory="/local2/user_data/brandl/data/stringr") ## score threshold default is 400
# #example1_mapped <- string_db$map( summary_allgids_df_sighit_gt1, "supplier_gene_symbol", species= removeUnmappedRows = TRUE )
# #clustersList <- string_db$get_clusters(example1_mapped$STRING_id)
# #for(network in clustersList)
# #{
# #	if(length(network)>5)
# #	{
# #		string_db$plot_network(network, add_link=FALSE)
# #	}
# #}
Holger Brandl's avatar
Holger Brandl committed
536 537 538
#

#' Created `r format(Sys.Date(), format="%B-%d-%Y")` by `r readLines(pipe('whoami'))`