Skip to content
Snippets Groups Projects
Commit 8b1e2d8e authored by Holger Brandl's avatar Holger Brandl
Browse files

improved cs workflow

parent ccf6d608
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env Rscript
#+ echo=FALSE
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R")
require(knitr)
require.auto(tidyr)
require_auto(knitr)
require_auto(tidyr)
#if (!require("DT")) devtools::install_github("rstudio/DT") ## see http://rstudio.github.io/DT/
## setwd("/projects/bioinfo/holger/projects/krause_chipseq/macs2")
......@@ -28,22 +28,26 @@ n_peaks <- list.files(".", "*.narrowPeak.*", full.names=TRUE, recursive=T) %>%
set_names(c("chromosome_name", "start_position", "end_position", "peak_id", "score", "strand", "fold_change", "pvalue", "qvalue", "sum_rel_to_start", "file")) %>%
select(-sum_rel_to_start)
peaks <- n_peaks
broadPeakFiles <- list.files(".", "*.broadPeak.*", full.names=TRUE, recursive=T)
if(length(broadPeakFiles)>0){
b_peaks <- list.files(".", "*.broadPeak.*", full.names=TRUE, recursive=T) %>%
ldply(readMacsPeaks) %>%
set_names(c("chromosome_name", "start_position", "end_position", "peak_id", "score", "strand", "fold_change", "pvalue", "qvalue", "file"))
peaks <- rbind_list(n_peaks, b_peaks)
peaks <- rbind_list(peaks, b_peaks)
}
## todo remove this later
peaks <- mutate(peaks, file=file %>%str_replace( "_narrow", ".narrow") %>% str_replace("_broad", ".broad"))
#peaks <- mutate(peaks, file=file %>%str_replace( "_narrow", ".narrow") %>% str_replace("_broad", ".broad"))
peaks %>% count(file) %>% kable()
peaks %<>% separate(file, c("sample", "peak_type", "ext"), sep="[.]") %>% select(-ext)
splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F)
#splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F)
#peaks %>% splitProtTime %>% head
peaks %<>% mutate(peak_width=end_position-start_position)
......@@ -107,13 +111,15 @@ peaks %>% sample_frac(0.1) %>%
#' # ChippeakAnno
# see [docs](http://www.bioconductor.org/packages/release/bioc/vignettes/ChIPpeakAnno/inst/doc/ChIPpeakAnno.pdf)
require_auto(ChIPpeakAnno)
data(TSS.zebrafish.Zv9)
#+ eval=FALSE, echo=FALSE
## DEBUG start
if(F){
#http://www.bioconductor.org/packages/release/bioc/vignettes/ChIPpeakAnno/inst/doc/ChIPpeakAnno.pdf
require(ChIPpeakAnno)
data(TSS.zebrafish.Zv9)
#require_auto(ChIPpeakAnno)
#data(TSS.zebrafish.Zv9)
## https://www.biostars.org/p/115101/
## http://master.bioconductor.org/help/course-materials/2009/SeattleNov09/IRanges/IRangesOverview.R
......
......@@ -18,9 +18,9 @@ Options:
opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
# opts <- docopt(doc, "--overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast" )
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/bio/diffex_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R")
require.auto(knitr)
require.auto(DT)
......@@ -148,14 +148,15 @@ pCutoff <- 0.05
## sync reactome pacakge to node
if(Sys.getenv("LSF_SERVERDIR")!="" && dir.exists("/tmp/local_r_packages")){
system("scp -r falcon:/tmp/local_r_packages /tmp")
system("if [ ! -d '/tmp/local_r_packages/reactome.db/' ]; then scp -r falcon:/tmp/local_r_packages /tmp ; fi")
}
require.auto(clusterProfiler)
require.auto(ReactomePA)
require_auto(clusterProfiler)
require_auto(ReactomePA)
cp_test <- function(geneIds){
# DEBUG geneIds <- glMapped %>% filter(cluster %in% c("cluster_3")) %$% entrez_gene_id %>% as.integer
# DEBUG geneIds <- head(glMapped,30)$entrez_gene_id
# geneIds=.$entrez_gene_id
if(length(geneIds)>1500){
......@@ -164,9 +165,9 @@ cp_test <- function(geneIds){
echo("testing", length(geneIds), " genes for enrichment")
PANTHER10_ontology <- read.delim("http://data.pantherdb.org/PANTHER10.0/ontology/Protein_Class_7.0")
# PANTHER10_ontology <- read.delim("http://data.pantherdb.org/PANTHER10.0/ontology/Protein_Class_7.0")
pantherResults <- enricher(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, TERM2GENE = PANTHER10_ontology) %>% summary()
# pantherResults <- enricher(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, TERM2GENE = PANTHER10_ontology) %>% summary()
keggResults <- enrichKEGG(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, use_internal_data=T) %>% summary()
reactomeResults <- enrichPathway(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE) %>% summary()
goResultsCC <- enrichGO(gene = geneIds, organism = cpSpecies, pvalueCutoff = pCutoff, readable = TRUE, ont = "CC") %>% summary()
......@@ -194,7 +195,7 @@ enrResults <- quote(glMapped %>% do(cp_test(.$entrez_gene_id))) %>% cache_it(pa
#}, .progress="text", .parallel=F) %>% cache_it(paste0("enrdata_", digest(glMapped)))
unloadNamespace('dplyr'); require(dplyr)
unloadNamespace('dplyr'); require(dplyr) ## tbd seriously???
#enrResults %<>% rename(Category=Description)
#enrResults %<>% rename(ontology=Category)
......
......@@ -26,14 +26,14 @@ require(knitr)
require(DESeq2)
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/bio/diffex_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.20/R/bio/diffex_commons.R")
require.auto(DT)
require_auto(DT)
require.auto(gplots)
require_auto(gplots)
count_matrix_file <- opts$count_matrix
contrasts_file <- opts$contrasts
......@@ -337,7 +337,7 @@ deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
#rawCounts <- counts(dds,normalized=F) %>%
# set_names(colData(dds)$condition) %>% rownames2column("ensembl_gene_id")
#ggplot(rawCounts, aes(H3HA_Sphere)) + geom_histogram() + scale_x_log10() + ggtitle("raw H3HA_Sphere counts distribution")
#require.auto(Hmisc)
#require_auto(Hmisc)
#
#bindCats <- rawCounts %>% transmute(ensembl_gene_id, bind_category=cut2(H3HA_Sphere, cuts=c(10, 100)))
#with(bindCats, as.data.frame(table(bind_category))) %>% kable()
......@@ -347,7 +347,10 @@ deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
# Load transcriptome annotations needed for results annotation
geneInfo <- quote({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
# mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment