diff --git a/dge_workflow/dge_analysis.R b/dge_workflow/dge_analysis.R index a4a78fd80c5d6c0eac7cec9483eb1d8e76c436c5..ac9fac3f656855a20d690bff6c9f36ea3d3230c0 100755 --- a/dge_workflow/dge_analysis.R +++ b/dge_workflow/dge_analysis.R @@ -4,8 +4,8 @@ # stop(paste("file does not exist\n", doc)) #} ######################################################################################################################## -#' Differential gene Expression Analysis #+ include=FALSE +##' # Differential Gene Expression Analysis ##' TBD: Small overview about pipeline @@ -21,19 +21,22 @@ Usage: dge_analysis.R [-S] <cuffdb_directory> Options: --compare <sample_pairs> Txt files with sample pairs for which dge analysis should be performed -S Dont do general statistics and qc analysis -' +--undirected Perform directed dge-steps in a directed manner' -print(paste("args are ", commandArgs(TRUE))) -opts <- docopt(doc, commandArgs(TRUE)) -# opts <- docopt(doc, "..") +#opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining +#opts <- docopt(doc,"cuffdir") -print("options were:") -print(opts) +opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" "))) +#opts + +skipQC <<- opts$S +directedDgeSets <- !opts$undirected +#print("options were:") +#print(opts) cuffdb_directory <- normalizePath(opts$cuffdb_directory) -print(paste("db dir is", cuffdb_directory)) if(is.na(file.info(cuffdb_directory)$isdir)){ stop(paste("db directory does not exist", doc)) } @@ -49,9 +52,12 @@ devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ #knitr::opts_knit$set(root.dir = getwd()) ####################################################################################################################### -#' # Cuffdiff DB Overview +#' # Cuffdiff DB #+ load_db, cache=FALSE +print(paste("db dir is", cuffdb_directory)) + + #' Used cuffdiff database in `r cuffdb_directory` ## workaround until cummerbund is fixed @@ -64,9 +70,11 @@ cuff runInfo(cuff) replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_mass, internal_scale, external_scale)) +#+ eval=skipQC +#hello hidden field ####################################################################################################################### -#' ## Score Distributions and correlation +#' ## Score Distributions #if(!as.logical(opts$S)){ @@ -97,7 +105,8 @@ fpkmSCVPlot(genes(cuff)) csVolcanoMatrix(genes(cuff)) #csVolcano(genes(cuff),"a", "b") - +####################################################################################################################### +#' ## Replicate Correlation and Clustering require.auto(edgeR) plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS") @@ -127,10 +136,7 @@ csDendro(genes(cuff),replicates=T) ####################################################################################################################### #' ## Raw data tables -## merge in basic gene info - -#xls(runInfo(cuff)) -#setwd("/Volumes/project-zeigerer/Rab5_NGS/results/") +## todo merge in basic gene info geneFpkm<-fpkm(genes(cuff)) @@ -148,7 +154,7 @@ write.delim(repGeneFPKMs, file="repGeneFPKMs.txt") #' [FPKMs by Replicate](repGeneFPKMs.txt) geneCounts<-count(genes(cuff)) -max(geneCounts$count) +#max(geneCounts$count) write.delim(geneCounts, file="geneCounts.txt") geneCgene.matrix<-fpkmMatrix(genes(cuff)) #' [Raw Counts](geneCounts.txt) @@ -158,10 +164,6 @@ write.delim(repCounts, file="repCounts.txt") # repCounts <- read.delim("repCounts.txt") #' [Raw Counts by Replicate](repCounts.txt) -#}else{ -# echo("skipping qc section") -#} - ####################################################################################################################### #' ## Extract lists of differentially expressed genes @@ -348,9 +350,9 @@ sigEnrResults %>% do({ #write.table(sigEnrResults, file=concat("sigEnrTerms.txt"), row.names=FALSE, sep="\t") -######################################################################################################################### +#+ include=FALSE +# ######################################################################################################################## ##' ## Enriched KEGG Pathways -##+ echo=FALSE, warning=FALSE # #require(pathview) #require(png) diff --git a/dge_workflow/lsf_rna_seq.sh b/dge_workflow/lsf_rna_seq.sh index afccde09e8de45c41c9ed6f56d105678fe634160..bd9bdedbdf22feea93e222f83317a4d64266f278 100755 --- a/dge_workflow/lsf_rna_seq.sh +++ b/dge_workflow/lsf_rna_seq.sh @@ -68,7 +68,7 @@ for fastqFile in $* ; do done wait4jobs .cajobs -ziprm cutadapt_logs $project__ca__* +ziprm cutadapt_logs ${project}__ca__* ## todo do a small report here about what has been trimmed away and why diff --git a/misc/spin.R b/misc/spin.R index f084d9ac8d256b457b74b313426d1381c45940e3..7ae1eb3198d251ffa832eb622587eecce5357620 100755 --- a/misc/spin.R +++ b/misc/spin.R @@ -15,13 +15,14 @@ suppressMessages(library(docopt)) # retrieve and parse the command-line arguments doc <- ' Use knitr to spin R documents -Usage: spin.R [-cewm] <r_script> [<script_args>...] +Usage: spin.R [options] <r_script> [<script_args>...] Options: -c Cache results -e Show Code -w Show warnings -m Show Messages +--keep Keep generated Rmd and md files ' #docopt(doc, "-w test a b c ")$"-w" @@ -78,6 +79,8 @@ opts_chunk$set( knit2html(basename(rmdScript), header=cssHeader) ## also remove the .md and the .Rmd files -file.remove(basename(rmdScript)) -file.remove(basename(str_replace(r_script, "[.]R$", ".md"))) +if(!spin_opts$keep){ + file.remove(basename(rmdScript)) + file.remove(basename(str_replace(r_script, "[.]R$", ".md"))) +}