diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 8d43570eb0a0fc56a4a9f1aa77087509b1b4279a..1aa79e4687642ddc34d426c327bee47ad95d3341 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -6,6 +6,10 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/bas ## enable snippet spinning source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/spinr/spin_utils.sh 2>&1 2>/dev/null) +export PATH=/home/brandl/bin/spinr:$PATH + +source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/master/R/rendr/rendr_utils.sh 2>&1 2>/dev/null) +export PATH=/home/brandl/bin/rendr:$PATH ## define common binaries @@ -17,7 +21,6 @@ export PATH=/sw/apps/python/current/bin:$PATH export PATH=/home/brandl/bin/deepTools/bin:$PATH export PATH=/projects/bioinfo/holger/bin/FastQC_0.11.2:$PATH export PATH=/projects/bioinfo/holger/bin/bedtools-2.23.0/bin/:$PATH -export PATH=/home/brandl/bin/spinr:$PATH export PATH=/home/brandl/bin/subread-1.4.6-p3-Linux-x86_64/bin:$PATH #export PATH=/home/brandl/bin/STAR/STAR-STAR_2.4.1d/source:$PATH @@ -75,7 +78,7 @@ done wait4jobs .fastqc_jobs -spin.R ${NGS_TOOLS}/dge_workflow/fastqc_summary.R $outputDir +rend.R ${NGS_TOOLS}/dge_workflow/fastqc_summary.R $outputDir mailme "$project: fastqc done in $(pwd)" diff --git a/dge_workflow/fastqc_summary.R b/dge_workflow/fastqc_summary.R index b7bdebb2573329ae423fee60bda1218907e017be..96011f22ffd524614a568587d3728d391faad87b 100755 --- a/dge_workflow/fastqc_summary.R +++ b/dge_workflow/fastqc_summary.R @@ -4,8 +4,8 @@ ## Note This script is supposed to be knitr::spin'ed -devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/core_commons.R") -devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/ggplot_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/core_commons.R") +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.14/R/ggplot_commons.R") ## can we access variables from the parent spin.R process? #echo("rscript is ", r_script) @@ -20,6 +20,7 @@ if(length(argv) != 1){ } baseDir=argv[1] +#baseDir=normalizePath(".") if(is.na(file.info(baseDir)$isdir)){ @@ -50,7 +51,7 @@ readCount <- function(statsFile){ readCounts <- fastqDataFiles %>% ldply(readCount) # %>% print_head() -require.auto(scales) + #+ fig.width=12, fig.height=round(nrow(readCounts)/2) ggplot(readCounts, aes(run, num_reads)) + geom_bar(stat="identity") + coord_flip() + scale_y_continuous(labels=comma) + ggtitle("read counts") @@ -92,7 +93,6 @@ dupLevels <- fastqDataFiles %>% ldply(function(statsFile){ ) }) -require.auto(scales) #+ fig.width=12, fig.height=round(nrow(dupLevels)/2) ggplot(dupLevels, aes(run, dedup_proportion)) + geom_bar(stat="identity") + coord_flip() + scale_y_continuous(labels=percent) + ggtitle("unique_reads/total_reads") + ylim(0,1) @@ -110,7 +110,7 @@ readBaseQualDist <- function(statsFile){ # echo("reading", statsFile) baseStats <- read.delim(pipe( - paste(get_zip_pipe(statsFile, "fastqc_data.txt"), " | grep -A60 -F '>>Per base sequence quality' | grep -B100 -F '>>END_MODULE' | head -n-1 | tail -n+2 | tr '#' ' '") + paste(get_zip_pipe(statsFile, "fastqc_data.txt"), " | grep -A200 -F '>>Per base sequence quality' | grep -B200 -F 'Per tile sequence quality' | head -n-2 | tail -n+2 | tr '#' ' '") )) %>% mutate( run=trim_ext(basename(statsFile), c(".zip")) ) @@ -119,14 +119,17 @@ readBaseQualDist <- function(statsFile){ } baseQualities <- fastqDataFiles %>% ldply(readBaseQualDist) -statsFile="fastqc/L8038_Track-21511_R1.trim_fastqc.zip" +#statsFile="fastqc/L8038_Track-21511_R1.trim_fastqc.zip" #with(baseQualities, as.data.frame(table(run))) #+ fig.widthh=20 -baseQualities %>% ggplot(aes(reorder(Base, base_order), Mean, group=run, color=run)) + geom_line() + scale_y_continuous(limits=c(2, 40)) +seqQualPlot <- baseQualities %>% ggplot(aes(reorder(Base, base_order), Mean, group=run, color=run)) + geom_line() + scale_y_continuous(limits=c(2, 40)) +## just show color legend if not too many samples +if(unlen(baseQualities$run) > 20) seqQualPlot <- seqQualPlot + guides(color=F) +seqQualPlot #' # Qualities per run including variance