diff --git a/dge_workflow/dge_analysis.R b/dge_workflow/dge_analysis.R index c042602e599e1ad4ae0cf27373e4f3dd14c2b2a0..3740226764c185fb738eef7504fcc0d2714c6f2f 100755 --- a/dge_workflow/dge_analysis.R +++ b/dge_workflow/dge_analysis.R @@ -10,19 +10,26 @@ Usage: dge_analysis.R [options] <cuffdb_directory> Options: --constrasts=<tab_delim_table> Table with sample pairs for which dge analysis should be performed --undirected Perform directed dge-steps in a directed manner +--qcutoff <qcutoff> Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01] +--pcutoff <pcutoff> Override q-value filter and filter by p-value instead -S Dont do general statistics and qc analysis ' #opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining -#opts <- docopt(doc, "--undirected ..") +# Stefania: opts <- docopt(doc, "--undirected --pcutoff 0.05 ..") +# opts <- docopt(doc, "--undirected ..") # opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), "..")) opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" "))) -# opts +#opts skipQC <<- opts$S split_hit_list <- !opts$undirected constrasts_file <- opts$constrasts +pcutoff <- if(is.null(opts$pcutoff)) NULL else as.numeric(opts$pcutoff) +qcutoff <- if(is.numeric(pcutoff)) NULL else as.numeric(opts$qcutoff) + +stopifnot(is.numeric(qcutoff) | is.numeric(pcutoff)) cuffdb_directory <- normalizePath(opts$cuffdb_directory) @@ -61,6 +68,8 @@ cuff <- readCufflinks(dir=tmpdir) cuff runInfo(cuff) replicateInfo <- replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_mass, internal_scale, external_scale)) +replicateInfo + write.delim(replicateInfo, file="replicateInfo.txt") # replicateInfo <- read.delim("replicateInfo.txt") #' [replicateInfo](replicateInfo.txt) @@ -82,6 +91,7 @@ if(!file.exists(cacheFile)){ mousemart = useDataset(martName, mart=useMart("ensembl")) geneInfo <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'description', 'gene_biotype'), mart=mousemart); save(geneInfo, file=cacheFile) + unloadNameSpace(biomaRt) }else{ geneInfo <- local(get(load(cacheFile))) } @@ -184,9 +194,9 @@ noTextLayer <- function(gg){ gg$layers[[2]] <- NULL;; gg} ## todo use fix scaled here -noTextLayer(csDistHeat(genes(cuff)) + ggtitle("mouse zone correlation")) +noTextLayer(csDistHeat(genes(cuff)) + ggtitle("Sample correlation")) -noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("mouse replicate correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15)) +noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("Replicate Correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15)) csDendro(genes(cuff),replicates=T) @@ -226,18 +236,27 @@ allDiff <- diffData(genes(cuff)) %>% mutate(sample_1_overex=log2_fold_change<0) +## ' Used cutoff criterion was `r if(use_pcutoff) echo("p_value<0.01") else echo("q_value<=0.01")` +#+ results='asis' +if(!is.null(qcutoff)){ + echo("Using q-value cutoff of ", qcutoff) + allDiff <- transform(allDiff, isHit=q_value<=qcutoff) +}else{ + echo("Using p-value cutoff of ", pcutoff) + allDiff <- transform(allDiff, isHit=p_value<=pcutoff) +} -##' Used cutoff criterion was: p_value<0.01 -#allDiff <- transform(allDiff, isHit=p_value<=0.01) -#' Used cutoff criterion was: q_value<0.01 -allDiff <- transform(allDiff, isHit=q_value<0.01) +#+ +# #' Used cutoff criterion was: q_value<0.01 +# allDiff <- transform(allDiff, isHit=q_value<0.01) degs <- subset(allDiff, isHit) #degs <- subset(allDiff, significant=="yes") + with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0) ## add gene info @@ -278,7 +297,7 @@ ontologies <- c( "GOTERM_BP_FAT" ) -#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=',')` +#' This analysis was performed using [David](http://david.abcc.ncifcrf.gov/). The following ontologies were tested: `r paste(ontologies, collapse=', ')` require(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/) @@ -317,7 +336,14 @@ davidAnnotationChart <- function(someGenes){ ## expexted to have a column with g } -enrResults <- degs %>% group_by(sample_1, sample_2, sample_1_overex) %>% do(davidAnnotationChart(.$ensembl_gene_id)) + +grpdDegs <- if(split_hit_list){ + degs %>% group_by(sample_1, sample_2, sample_1_overex) +}else{ + degs %>% group_by(sample_1, sample_2) +} + +enrResults <- grpdDegs %>% do(davidAnnotationChart(.$ensembl_gene_id)) write.delim(enrResults, file="enrResults.txt") # enrResults <- read.delim("enrResults.txt") diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 646ca00a655bae7059d4de4ffccd9450e06fc52d..09789b0a97448c5aa9e086d2671e20d96312681d 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -209,6 +209,7 @@ fi allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort) ## todo use optional argument here --> default "unmapped" --> allow user to add others #allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "1029" | sort) +#allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "Ctrl_1029" | sort) bamsSplit="" for label in $(echo $labels | tr ", " " "); do