Skip to content
Snippets Groups Projects
Commit e4d97c24 authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. dge analysis

parent a496a7db
No related branches found
No related tags found
No related merge requests found
......@@ -10,19 +10,26 @@ Usage: dge_analysis.R [options] <cuffdb_directory>
Options:
--constrasts=<tab_delim_table> Table with sample pairs for which dge analysis should be performed
--undirected Perform directed dge-steps in a directed manner
--qcutoff <qcutoff> Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]
--pcutoff <pcutoff> Override q-value filter and filter by p-value instead
-S Dont do general statistics and qc analysis
'
#opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining
#opts <- docopt(doc, "--undirected ..")
# Stefania: opts <- docopt(doc, "--undirected --pcutoff 0.05 ..")
# opts <- docopt(doc, "--undirected ..")
# opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), ".."))
opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" ")))
# opts
#opts
skipQC <<- opts$S
split_hit_list <- !opts$undirected
constrasts_file <- opts$constrasts
pcutoff <- if(is.null(opts$pcutoff)) NULL else as.numeric(opts$pcutoff)
qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff)
stopifnot(is.numeric(qcutoff) | is.numeric(pcutoff))
cuffdb_directory <- normalizePath(opts$cuffdb_directory)
......@@ -61,6 +68,8 @@ cuff <- readCufflinks(dir=tmpdir)
cuff
runInfo(cuff)
replicateInfo <- replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_mass, internal_scale, external_scale))
replicateInfo
write.delim(replicateInfo, file="replicateInfo.txt")
# replicateInfo <- read.delim("replicateInfo.txt")
#' [replicateInfo](replicateInfo.txt)
......@@ -82,6 +91,7 @@ if(!file.exists(cacheFile)){
mousemart = useDataset(martName, mart=useMart("ensembl"))
geneInfo <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'description', 'gene_biotype'), mart=mousemart);
save(geneInfo, file=cacheFile)
unloadNameSpace(biomaRt)
}else{
geneInfo <- local(get(load(cacheFile)))
}
......@@ -184,9 +194,9 @@ noTextLayer <- function(gg){ gg$layers[[2]] <- NULL;; gg}
## todo use fix scaled here
noTextLayer(csDistHeat(genes(cuff)) + ggtitle("mouse zone correlation"))
noTextLayer(csDistHeat(genes(cuff)) + ggtitle("Sample correlation"))
noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("mouse replicate correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15))
noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("Replicate Correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15))
csDendro(genes(cuff),replicates=T)
......@@ -226,18 +236,27 @@ allDiff <- diffData(genes(cuff)) %>%
mutate(sample_1_overex=log2_fold_change<0)
## ' Used cutoff criterion was `r if(use_pcutoff) echo("p_value<0.01") else echo("q_value<=0.01")`
#+ results='asis'
if(!is.null(qcutoff)){
echo("Using q-value cutoff of ", qcutoff)
allDiff <- transform(allDiff, isHit=q_value<=qcutoff)
}else{
echo("Using p-value cutoff of ", pcutoff)
allDiff <- transform(allDiff, isHit=p_value<=pcutoff)
}
##' Used cutoff criterion was: p_value<0.01
#allDiff <- transform(allDiff, isHit=p_value<=0.01)
#' Used cutoff criterion was: q_value<0.01
allDiff <- transform(allDiff, isHit=q_value<0.01)
#+
# #' Used cutoff criterion was: q_value<0.01
# allDiff <- transform(allDiff, isHit=q_value<0.01)
degs <- subset(allDiff, isHit)
#degs <- subset(allDiff, significant=="yes")
with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0)
## add gene info
......@@ -278,7 +297,7 @@ ontologies <- c(
"GOTERM_BP_FAT"
)
#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=',')`
#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')`
require(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/)
......@@ -317,7 +336,14 @@ davidAnnotationChart <- function(someGenes){ ## expexted to have a column with g
}
enrResults <- degs %>% group_by(sample_1, sample_2, sample_1_overex) %>% do(davidAnnotationChart(.$ensembl_gene_id))
grpdDegs <- if(split_hit_list){
degs %>% group_by(sample_1, sample_2, sample_1_overex)
}else{
degs %>% group_by(sample_1, sample_2)
}
enrResults <- grpdDegs %>% do(davidAnnotationChart(.$ensembl_gene_id))
write.delim(enrResults, file="enrResults.txt")
# enrResults <- read.delim("enrResults.txt")
......
......@@ -209,6 +209,7 @@ fi
allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort)
## todo use optional argument here --> default "unmapped" --> allow user to add others
#allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "1029" | sort)
#allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "Ctrl_1029" | sort)
bamsSplit=""
for label in $(echo $labels | tr ", " " "); do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment