Skip to content
Snippets Groups Projects
Commit 9c14c590 authored by Holger Brandl's avatar Holger Brandl
Browse files

improved summary

parent 697b5043
No related branches found
No related tags found
No related merge requests found
#' # Read Summary Report
#' # FastQC Summary for `r getwd()`
##+ echo=FALSE, message=FALSE
## Note This script is supposed to be knitr::spin'ed
......@@ -6,23 +6,29 @@
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
## can we access variables from the parent spin.R process?
#echo("rscript is ", r_script)
argv = commandArgs(TRUE)
#echo("argv is ", argv)
if(str_detect(argv[1], "fastqc_summary")) argv <- argv[-1]
#if(str_detect(argv[1], "fastqc_summary")) argv <- argv[-1]
if(length(argv) != 1){
stop("Usage: fastqc_summmary.R <directory with fastqc results>")
echo
}
baseDir=argv[1]
if(is.na(file.info(baseDir)$isdir)){
stop(paste("movie directory does not exist"))
}
#baseDir="fastqc"
#baseDir="/Volumes/projects/bioinfo/holger/projects/helin/mouse/fastqc"
baseDir=argv[1]
echo("working dir is data in", getwd())
echo("processing data in", baseDir)
fastqDataFiles <- list.files(path=baseDir, pattern="^fastqc_data.txt", full.names=TRUE, recursive=T)
......@@ -67,12 +73,15 @@ qcSummary %>% ggplot(aes(score, run, fill=tolower(flag))) +
scale_fill_manual(values = c(fail="darkred", pass="darkgreen", warn="orange"))
#' # Base Quality Distribution Summary
#' Median qualities per position to compare positional patterns and differences between the runs
readBaseQualDist <- function(statsFile){
# statsFile="./fastqc/mouse_big_cyst_rep2_fastqc/fastqc_data.txt"
# statsFile="./fastqc/mouse_liver_polar_stage3_rep2_fastqc/fastqc_data.txt"
# grep -A30 -F '>>Per base sequence quality' /Volumes/projects/bioinfo/holger/projects/helin/mouse/fastqc/mouse_big_cyst_rep1_fastqc/fastqc_data.txt | grep -B100 -F '>>END_M' | head -n-1 | tail -n+2 | tr '#' ' '
echo("reading", statsFile)
# echo("reading", statsFile)
baseStats <- read.delim(pipe(
paste("grep -A30 -F '>>Per base sequence quality' ", statsFile, "| grep -B100 -F '>>END_M' | head -n-1 | tail -n+2 | tr '#' ' '")
......@@ -91,13 +100,16 @@ baseQualities <- fastqDataFiles %>% ldply(readBaseQualDist)
baseQualities %>% ggplot(aes(reorder(Base, base_order), Mean, group=run, color=run)) + geom_line() + scale_y_continuous(limits=c(2, 40))
#' # Base Quality Distribution Summary
#+ warning=FALSE, fig.width=15
runs <- with(baseQualities, as.data.frame(table(run)))
#' Qualities per run including variance
## http://stackoverflow.com/questions/12518387/can-i-create-an-empty-ggplot2-plot-in-r
baseQualities %>%
head(30) %>%
# head(30) %>%
ggplot() +
geom_blank(mapping=aes(reorder(Base, base_order))) +
geom_rect(aes(xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=20), data=runs, alpha=0.05, fill="red") +
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment