Skip to content
Snippets Groups Projects
featcounts_deseq.R 20.9 KiB
Newer Older
#!/usr/bin/env Rscript
#+ include=FALSE

Holger Brandl's avatar
Holger Brandl committed

suppressMessages(require(docopt))

doc <- '
Convert a featureCounts results matrix into a dge-report using deseq2
Melanie Schneider's avatar
Melanie Schneider committed
Usage: featcounts_deseq.R [options] <count_matrix>
Holger Brandl's avatar
Holger Brandl committed

Options:
--contrasts=<tab_delim_table> Table with sample pairs for which dge analysis should be performed
--qcutoff <qcutoff>    Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]
--pcutoff <pcutoff>    Override q-value filter and filter by p-value instead
--min_count <min_count>   Minimal expression in any of the sample to be included in the final result list [default: 10]
Melanie Schneider's avatar
Melanie Schneider committed
opts <- docopt(doc, commandArgs(TRUE))
Melanie Schneider's avatar
Melanie Schneider committed
#opts <- docopt(doc, "--contrasts ~/MPI-CBG_work/P5_DESeq/dba_contrasts_human.txt ~/MPI-CBG_work/P5_DESeq/countMatrix_human.txt")
Holger Brandl's avatar
Holger Brandl committed
## opts <- docopt(doc, "countMatrix.txt")

require(knitr)
require(DESeq2)

Holger Brandl's avatar
Holger Brandl committed

Holger Brandl's avatar
Holger Brandl committed
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.12/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.12/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.12/R/bio/diffex_commons.R")
Holger Brandl's avatar
Holger Brandl committed
require.auto(DT)
Holger Brandl's avatar
Holger Brandl committed
require.auto(gplots)

count_matrix_file <- opts$count_matrix
contrasts_file <- opts$contrasts

pcutoff <- if(is.null(opts$pcutoff)) NULL else as.numeric(opts$pcutoff)
qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff)
if(is.numeric(pcutoff)) opts$qcutoff <- NULL


########################################################################################################################
#' ## Data preparation

#' Working Directory: `r getwd()`

resultsBase <- count_matrix_file %>% basename() %>% trim_ext(".txt") %>% trim_ext(".count_matrix")

countData <- read.delim(count_matrix_file)

#' strucutre of input matrix was
glimpse(countData, width=60)
countMatrix <- countData %>% column2rownames("ensembl_gene_id") %>% as.matrix()

Holger Brandl's avatar
Holger Brandl committed
# Expression Filtering
genesBefore <- nrow(countMatrix)
countMatrix %<>% filterByExpression(opts$min_count)
genesAfter <- nrow(countMatrix)
#' Counts were filtered to only retain genes which had more that `r opts$min_count` alignments in at least one replicates. This reduced the number of genes from `r genesBefore` to `r genesAfter`.
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
#' ## Perform Differential Expression Analysis
Melanie Schneider's avatar
Melanie Schneider committed

#' DESeq is an R package to analyse count data from high-throughput sequencing assays such as RNA-Seq and test for differential expression. The latest version, DESeq2, is used.
#' DESeq uses a model based on the negative binomial distribution and offers the following features:
#' Count data is discrete and skewed and is hence not well approximated by a normal distribution. Thus, a test based on the negative binomial distribution, which can reflect these properties, has much higher power to detect differential expression.
#' Tests for differential expression between experimental conditions should take into account both technical and biological variability. Recently, several authors have claimed that the Poisson distribution can be used for this purpose. However, tests based on the Poisson assumption (this includes the binomial test and the chi-squared test) ignore the biological sampling variance, leading to incorrectly optimistic p values. The negative binomial distribution is a generalisation of the Poisson model that allows to model biological variance correctly. 
#' DESeq estimate the variance in a local fashion, using different coefficients of variation for different expression strengths. This removes potential selection biases in the hit list of differentially expressed genes, and gives a more balanced and accurate result.

#' See deseq reference [docs](http://master.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf) for details

#' To understand fold-change shrinking and estimation check out
#' [Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2](http://genomebiology.com/2014/15/12/550/abstract)

get_sample_from_replicate <- function(repName) str_match(repName, "(.*)_[R]?[0-9]{1}$")[,2]
Holger Brandl's avatar
Holger Brandl committed
# Define or load a contrasts matrix
if(!is.null(contrasts_file)){
    contrasts <- read.delim(contrasts_file, header=T) %>% fac2char()
}else{
Holger Brandl's avatar
Holger Brandl committed
    contrasts <- data.frame(sample=get_sample_from_replicate(colnames(countMatrix))) %>% distinct() %>%
        merge(.,., suffixes=c("_1", "_2"), by=NULL) %>%
        filter(ac(sample_1)>ac(sample_2)) %>%
#        filter(ac(sample_1)!=ac(sample_2)) %>%
        fac2char()
    write.delim(contrasts, "dba_contrasts.txt")
}

Holger Brandl's avatar
Holger Brandl committed
#' The used contrasts model is
contrasts %>% kable()

Holger Brandl's avatar
Holger Brandl committed
## gene specific factors
#normalizationFactors(dds)

## sizeFactor R help:sigEnrResults
#dds <- makeExampleDESeqDataSet()
#dds <- estimateSizeFactors( dds )
#sizeFactors(dds)

## try again but now use lambda normalization
## see "3.11 Sample-/gene-dependent normalization factors" in the DEseq2 manual for details
Holger Brandl's avatar
Holger Brandl committed
colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
Melanie Schneider's avatar
Melanie Schneider committed

# valiadate that contrasts model is compatible with data
stopifnot(all((contrasts %>% gather %$% value %>% unique) %in% colData$condition))

dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition))
Holger Brandl's avatar
Holger Brandl committed
dds <- estimateSizeFactors(dds)
Holger Brandl's avatar
Holger Brandl committed
#' For details about size factor normalziation and calculation see https://www.biostars.org/p/79978/
Holger Brandl's avatar
Holger Brandl committed
##' Size Factors estimated by DESeq
sizeFactors(dds) %>%
    set_names(colnames(countMatrix)) %>% melt() %>%
    rownames2column("sample") %>%
    ggplot(aes(sample, value)) + geom_bar(stat="identity") + ggtitle("deseq size factors") + coord_flip()
Holger Brandl's avatar
Holger Brandl committed
##' From the DESeq docs about how size factors are used: The sizeFactors vector assigns to each column of the count matrix a value, the size factor, such that count values in the columns can be brought to a common scale by dividing by the corresponding size factor.
Holger Brandl's avatar
Holger Brandl committed
##' This means that counts are divied by size factors. So let's now load the lambda libraies and replace the predefined size factors with our custom ones

#' From DESeq manual: The regularized log transformation is preferable to the variance stabilizing transformation if the size factors vary widely.

#' Run Deseq Test
#dds <- DESeq(dds, fitType='local', betaPrior=FALSE)
Holger Brandl's avatar
Holger Brandl committed
#dds <- DESeq(dds, fitType='local')
dds <- DESeq(dds)

#res <- results(dds)
#resultsNames(dds)

#' Model Overview:
summary(results(dds))

Melanie Schneider's avatar
Melanie Schneider committed
########################################################################################################################
#' ## Quality Control

#' ### Data Dispersion

# TODO explain how this plot is useful
Melanie Schneider's avatar
Melanie Schneider committed
#' Plot of the per-gene dispersion estimates together with the fitted mean-dispersion relationship.
#' The dispersion plot shows how the estimates are shrunk from the gene wise values (black dots) toward the fitted estimates, with the final values used in testing being the blue dots.
#' The dispersion can be understood as the square of the coefficient of biological variation.  So, if a gene's expression typically differs from replicate to replicate sample by 20%, this gene's dispersion is: .20^2 = .04.
## The function estimateDispersions performs three steps. First, it estimates, for each gene and each (replicated) condition, a dispersion value, then, it fits, for each condition, a curve through the estimates. Finally, it assigns to each gene a dispersion value, using either the estimated or the fitted value.
dds <- estimateDispersions(dds)
Melanie Schneider's avatar
Melanie Schneider committed
plotDispEsts(dds, main="Dispersion plot")

########################################################################################################################
#' ### PCA and Clustering
Melanie Schneider's avatar
Melanie Schneider committed

#' In order to assess overall similarity between samples two common statistical methods are used - Principal component analysis (PCA) and clustering.
#' This should provide answers to the questions: Which samples are similar to each other, which are different? Does this fit to the expectation from the experiment’s design?
#' Using a regularized log transformation of the raw counts provides the advantage that it stabilizes the variance across the mean.


#' Principal component analysis (PCA) is a technique used to emphasize variation and bring out strong patterns in a dataset.
#' Principal components are the underlying structure in the data. They are the directions where there is the most variance, the directions where the data is most spread out. The data points/samples are projected onto the 2D plane such that they spread out in the two directions that explain most of the differences. The x-axis is the direction that separates the data points the most and the y-axis is a direction that separates the data the second most.

# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)

plotPCA(rld, intgroup = "condition")
Melanie Schneider's avatar
Melanie Schneider committed


#' The Euclidean distance between samples are calculated after performing the regularized log transformation.
#' Using the calculated distance matrix, the samples are projected on a two-dimensional graph such that the distance between samples approximately corresponds to the biological coefficient of variation between those samples.

#distsRL <- dist(t(assay(rld)))
#hc <- hclust(distsRL)
#distMatrix <- as.matrix(distsRL)
distMatrix <- as.matrix(dist(t(assay(rld))))
#rownames(distMatrix) <- colnames(distMatrix) <- with(colData(dds), paste(condition, sep=" : "))

labelcntData <- countData %>%
  distinct(ensembl_gene_id) %>% column2rownames("ensembl_gene_id") %>%
  fac2char %>% data.matrix() %>% round() %>% data.frame()

heatmap.2(distMatrix, labRow=colnames(labelcntData), labCol=colnames(labelcntData),
          symm=TRUE, trace="none",
          #key=F, col=colorpanel(100, "black", "white"),
          margin=c(8, 8), main="Sample Distance Matrix")

########################################################################################################################


## extract all de-sets according to our contrasts model
deResults <- adply(contrasts, 1,  splat(function(sample_1, sample_2){
Melanie Schneider's avatar
Melanie Schneider committed
 #   browser()
        results(dds, contrast=c("condition", sample_1, sample_2)) %>%
        rownames2column("ensembl_gene_id") %>%
        as.data.frame() %>%
        ## see http://rpackages.ianhowson.com/bioc/DESeq2/man/results.html when using contrasts argument
        rename(s1_over_s2_logfc=log2FoldChange) %>%
        mutate(sample_1=sample_1, sample_2=sample_2)
}))

Melanie Schneider's avatar
Melanie Schneider committed
# deResults <- adply(contrasts, 1,  splat(function(sample_1, sample_2){
#  echo('samples', sample_1, sample_2)
#   return(data.frame())
# }))

deResults %>% ggplot(aes(s1_over_s2_logfc)) +
    geom_histogram(binwidth=0.1) +
    facet_grid(sample_1 ~ sample_2) + geom_vline(yintercept=0, color="blue") +
    xlim(-2,2) + 
    ggtitle("sample1 over sampl2 logFC ")


Melanie Schneider's avatar
Melanie Schneider committed
########################################################################################################################
#' ## Significnce of differential binding

deResults %>% ggplot(aes(pvalue)) +
    geom_histogram() +
    xlim(0,1) +
    facet_grid(sample_1 ~ sample_2) + geom_vline(yintercept=0.01, slope=1, color="blue") +
    ggtitle("p-value distributions") #+ scale_x_log10()

deResults %>% ggplot(aes(padj)) +
    geom_histogram() +
Holger Brandl's avatar
Holger Brandl committed
#    xlim(0,1) +
    facet_grid(sample_1 ~ sample_2) + geom_vline(yintercept=0.01, slope=1, color="blue") +
Holger Brandl's avatar
Holger Brandl committed
    ggtitle("Adjusted p-value distributions") #+ scale_x_log10()
# report hit criterion
#+ results='asis'
if(!is.null(qcutoff)){
    echo("Using q-value cutoff of", qcutoff)
    deResults %<>% transform(is_hit=padj<=qcutoff)
}else{
    echo("Using p-value cutoff of", pcutoff)
    deResults %<>% transform(is_hit=pvalue<=pcutoff)
}
#deResults %<>% mutate(is_hit=pvalue<0.05)

deResults %<>% mutate(s1_overex=s1_over_s2_logfc>1)


normCounts <- counts(dds,normalized=TRUE) %>%
Holger Brandl's avatar
Holger Brandl committed
#    set_names(colData(dds)$condition) %>% rownames2column("ensembl_gene_id")
    set_names(colnames(countMatrix)) %>% rownames2column("ensembl_gene_id")


## .. should be same as input
#filter(countData, ensembl_gene_id=="ENSDARG00000000001")

Melanie Schneider's avatar
Melanie Schneider committed
########################################################################################################################
#' # MA and Volcano plots

# deseq approach
# plotMA(deResults, main="DESeq2", ylim=c(-2,2))

Melanie Schneider's avatar
Melanie Schneider committed
# Calculation of the base means per condition
Melanie Schneider's avatar
Melanie Schneider committed
baseMeanPerLvl <- sapply( levels(dds$condition), function(lvl) rowMeans( counts(dds,normalized=TRUE)[,dds$condition == lvl] ) ) %>% 
Melanie Schneider's avatar
Melanie Schneider committed
  rownames2column("ensembl_gene_id")
Melanie Schneider's avatar
Melanie Schneider committed

## add base means to diffßex summary
deResults <- baseMeanPerLvl %>%
    gather(sample, norm_count, -ensembl_gene_id) %>% 
    merge(.,., by="ensembl_gene_id", suffixes=c("_1", "_2")) %>% 
#    filter(ac(sample_1)<ac(sample_2)) %>%
    # add diffex status
    merge(deResults)

#+ fig.width=16, fig.height=14
Melanie Schneider's avatar
Melanie Schneider committed
deResults %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_2/norm_count_1), color=pvalue<0.05)) +
    geom_point(alpha=0.3) +
    geom_hline(yintercept=0, color="red") +
    facet_grid(sample_1 ~ sample_2)


Melanie Schneider's avatar
Melanie Schneider committed
deResults %$% pvalue %>%  log10() %>% quantile(0.05, na.rm=T)
Melanie Schneider's avatar
Melanie Schneider committed

#' For the volcano plot we have fold change on the X-axis and -log10 of the p-value on the y-axis.
##Volcano plots
hitCounts <- filter(deResults, is_hit) %>%
        count(sample_1, sample_2, s1_overex) %>%
        rename(hits=n) %>%
        merge(data.frame(s1_overex=c(T,F), x_pos=c(-2.5,2.5)))

#+ fig.width=16, fig.height=14
deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
    geom_jitter(alpha=0.3, position = position_jitter(height = 0.2)) +
#    theme_bw() +
    xlim(-3,3) +
    scale_color_manual(values = c("TRUE"="red", "FALSE"="black")) +
#    ggtitle("Insm1/Ctrl") +
    ## http://stackoverflow.com/questions/19764968/remove-point-transparency-in-ggplot2-legend
    guides(colour = guide_legend(override.aes = list(alpha=1))) +

    ## tweak axis labels
    xlab(expression(log[2]("x/y"))) +
    ylab(expression(-log[10]("p value"))) +
Melanie Schneider's avatar
Melanie Schneider committed
#  ylim(0,50) +

    ## add hit couts
    geom_text(aes(label=hits, x=x_pos), y=2, color="red", size=10, data=hitCounts) +
    facet_grid(sample_1 ~ sample_2)



# Define absolute binding categories
Holger Brandl's avatar
Holger Brandl committed
#rawCounts <- counts(dds,normalized=F) %>%
#    set_names(colData(dds)$condition) %>% rownames2column("ensembl_gene_id")
#ggplot(rawCounts, aes(H3HA_Sphere)) + geom_histogram() + scale_x_log10() + ggtitle("raw H3HA_Sphere counts distribution")
#require.auto(Hmisc)
#
#bindCats <- rawCounts %>% transmute(ensembl_gene_id, bind_category=cut2(H3HA_Sphere, cuts=c(10, 100)))
#with(bindCats, as.data.frame(table(bind_category))) %>% kable()
Melanie Schneider's avatar
Melanie Schneider committed
# Load transcriptome annotations needed for results annotation
Holger Brandl's avatar
Holger Brandl committed
geneInfo <- quote({
  ## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
  mart <- biomaRt::useDataset(guess_mart(countData$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
  c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
        biomaRt::getBM(mart=mart)
}) %>% cache_it("geneInfo")


# Export the complete dataset for later analysis
deAnnot <- deResults %>%
Melanie Schneider's avatar
Melanie Schneider committed
  #inner_join(normCounts) %>% 
  #merge(., contrasts) %>%
  #merge(.,., suffixes=c("_1", "_2"), by=NULL) %>%
  #inner_join(baseMeanPerLvl, id='ensembl_gene_id') %>%
  left_join(geneInfo)

write.delim(deAnnot, file=paste0(resultsBase, ".dba_results.txt"))
#'  [deAnnot](`r paste0(resultsBase, ".dba_results.txt")`)


Melanie Schneider's avatar
Melanie Schneider committed
########################################################################################################################
#' ## Hits Summary

## Extract hits from deseq results
degs <- deAnnot %>% filter(is_hit)

Melanie Schneider's avatar
Melanie Schneider committed
# ggplot(degs, aes(paste(sample_1, "vs",  sample_2))) + geom_bar() +xlab(NULL) + ylab("# DBGs") + ggtitle("DBG count summary") + coord_flip()
Melanie Schneider's avatar
Melanie Schneider committed
# degs %>%
#     ggplot(aes(paste(sample_1, "vs",  sample_2), fill=s1_overex)) +
#     geom_bar(position="dodge") +
#     xlab(NULL) + ylab("# DBGs") +
#     ggtitle("DBG count summary by direction of expression") +
#     coord_flip()

# Export DBA genes
## disabled because we just subset the annotated data now to define degs
#degsAnnot <- degs %>%
#    inner_join(normCounts) %>%
#    left_join(geneInfo) %>%
#    left_join(bindCats)
degs %>% write.delim(file=paste0(resultsBase, ".diffbind_genes.txt"))
#'  [degs](`r paste0(resultsBase, ".diffbind_genes.txt")`)

Melanie Schneider's avatar
Melanie Schneider committed
unloadNamespace("dplyr"); require(dplyr)
Melanie Schneider's avatar
Melanie Schneider committed
# ## render interactive hit browser
# #+results='asis'
# degs %>%
#     left_join(geneInfo) %>%
#     mutate(
#         igv=paste0("<a href='http://localhost:60151/goto?locus=", chromosome_name,":", start_position, "-", end_position, "'>IGV</a>")
#     ) %>%
#     select(s1_over_s2_logfc, pvalue, ensembl_gene_id, sample_1, sample_2, external_gene_name, description, igv) %>%
# #    kable("html", table.attr = "class='dtable'", escape=F)
#     datatable(escape=F)
Melanie Schneider's avatar
Melanie Schneider committed
#' ### DEG Counts

#+ fig.height=nrow(contrasts)+2
#ggplot(degs, aes(paste(sample_1, "vs",  sample_2), fill=status)) + geom_bar() +xlab(NULL) + ylab("# DGEs") +ggtitle("DEGs by contrast") + coord_flip()
ggplot(degs, aes(paste(sample_1, "vs",  sample_2), fill=(s1_overex))) + geom_bar() + xlab(NULL) + ylab("# DGEs") + ggtitle("DEGs by contrast") + coord_flip()

#with(degs, as.data.frame(table(sample_1, sample_2, s1_overex))) %>% filter(Freq >0) %>% kable()
degs %>% count( sample_1, sample_2, s1_overex) %>% kable()


#'  DEGs (differentially expressed genes) are contained in the following table
write.delim(degs, file="degs.txt")
# degs <- read.delim("degs.txt")
#' [Differentially Expressed Genes](degs.txt)


#' DEGs can be browsed in Excel using the exported table or via the embedded table browser. To enable the IGV links, you need to set the port in the IGV preferences to 3334.
kableDegs <- degs
if(nrow(degs>2000)){
  kableDegs <- degs %>% arrange(padj) %>% head(1000)
  #  Error: object 'q_value' not found. So: padj used instead of '-q_value'
  print("just showing first 1000 most significant hits (highest p-value) in interactive hit table!!!! Use Excel to browser to browse the full set")
}

#+ results='asis'
kableDegs %>%
  #inner_join(geneLoci) %>%
  mutate(
    ensembl=paste0("<a href='http://www.ensembl.org/Multi/Search/Results?y=0;site=ensembl_all;x=0;page=1;facet_feature_type=Gene;q=",ensembl_gene_id, "'>",ensembl_gene_id, "</a>"),
    igv=paste0("<a href='http://localhost:60151/goto?locus=", chromosome_name,":", start_position, "-", end_position, "'>IGV</a>")
  ) %>%
  select(sample_1, sample_2, s1_over_s2_logfc, pvalue, padj, ensembl_gene_id, external_gene_name, description) %>%
Holger Brandl's avatar
Holger Brandl committed
#  kable("html", table.attr = "class='dtable'", escape=F)
   datatable(escape=F)

## just needed to restore environment easily
save(degs, file=".degs.RData")
# degs <- local(get(load(".degs.RData")))


Melanie Schneider's avatar
Melanie Schneider committed
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
#' ## MA Plots

#' Redo MA plots but now including hit overlays

maPlots <- deResults %>% group_by(sample_1, sample_2) %>% do(gg={ maData <-.
## todo why not s2_over_s1_log2fc
Melanie Schneider's avatar
Melanie Schneider committed
maData %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_1/norm_count_2), color=is_hit)) +
  geom_point(alpha=0.3) +
  geom_hline(yintercept=0, color="red") +
  ggtitle(with(maData[1,], paste(sample_1, "vs", sample_2)))
}) %$% gg

#+ fig.height=6*ceiling(length(maPlots)/2)
multiplot(plotlist=maPlots, cols=min(2, length(maPlots)))


Melanie Schneider's avatar
Melanie Schneider committed
##--------------------------------------------------------
Melanie Schneider's avatar
Melanie Schneider committed
## Useful link: https://gist.github.com/stephenturner/f60c1934405c127f09a6
Holger Brandl's avatar
Holger Brandl committed
## TODO later, reenable child-inclusion of enrichment analysis
########################################################################################################################
## ## Term enrichment Data Preparation
Holger Brandl's avatar
Holger Brandl committed
##+ echo=FALSE
#
## This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`
Holger Brandl's avatar
Holger Brandl committed
#
#geneLists <- degs %>%
#    transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
#
#if(nrow(contrasts)<4){
#    geneLists <- rbind_list(
#        geneLists,
#        degs %>% filter(s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, ">", sample_2)),
#        degs %>% filter(!s1_overex) %>% transmute(ensembl_gene_id, list_id=paste(sample_1, "<", sample_2))
#    )
#}
#
### additional overlaps before doing the intersection analysis
#geneLists %>% count(list_id) %>% kable()
#
#intersectLists <- function(geneLists, listIdA, listIdB, intersectListId) {
#    commonGenes <- setdiff(filter(geneLists, list_id==listIdA)$ensembl_gene_id, filter(geneLists, list_id==listIdB)$ensembl_gene_id)
#    data.frame(list_id=intersectListId, ensembl_gene_id=commonGenes)
#}
#
#geneLists %<>% group_by(list_id)
#save(geneLists, file=".enrGeneLists.RData")
## geneLists <- local(get(load("enrGeneLists.RData")))
#
#
### redefine opts arguments and tweak knitr options
#opts_knit$set(root.dir = getwd())
#commandArgs <- function(x) c(paste("--overlay_expr_data ", count_matrix_file, " enrGeneLists.RData"))
##source("/home/brandl/mnt/mack/bioinfo/scripts/ngs_tools/dev/common/david_enrichment.R")
## child='/home/brandl/mnt/mack/bioinfo/scripts/ngs_tools/dev/common/david_enrichment.Rmd'