Commit 103c432c authored by Holger Brandl's avatar Holger Brandl

adjusted knitr parameters

parent f7605dc7
#!/usr/bin/env Rscript
#' # First try
#' # Quality Control Summary for `r getwd()`
##+ echo=FALSE, message=FALSE
## Note This script is supposed to be knitr::spin'ed
......@@ -21,21 +21,20 @@ if(length(argv) != 1){
}
baseDir=argv[1]
#baseDir="."
if(is.na(file.info(baseDir)$isdir)){
stop(paste("directory '", baseDir,"'does not exist"))
}
baseDir="/home/mel/MPI-Bioinf/Project1_reads/141126_cutadapt_logs"
# baseDir="/home/mel/MPI-Bioinf/Project1_reads/141126_cutadapt_logs"
logDataFiles <- list.files(path=baseDir, pattern="ca.fastq.gz.ca.log", full.names=TRUE, recursive=T)
#echo("files are", logDataFiles)
#' ## Title
echo("Quality Control Summary")
#' ## General information
info1=readLines(pipe( paste("grep -F 'cutadapt' ", logDataFiles[1]) ))
echo(info1)
......@@ -44,10 +43,11 @@ echo(info2)
info3=readLines(pipe( paste("grep -F 'No. of adapters' ", logDataFiles[1]) ))
echo(info3)
#' ## cutadapt parameters:
#' ## Cutadapt Parameters:
parameters=readLines(pipe( paste("grep -F 'Command line parameters' ", logDataFiles[1]) ))
echo(parameters)
echo("Some explanation:")
if (grepl("-a", parameters) ==TRUE)
echo("-a indicates that the following is a 3' adapter.")
......@@ -71,7 +71,7 @@ if (grepl("-N", parameters) ==TRUE)
echo("For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html")
#' ## Table 1: information of each run
#' ## Trimming Overview
genTable1 <- function(logFile){
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
......@@ -83,9 +83,15 @@ genTable1 <- function(logFile){
too_short=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
)
}
table1 <- logDataFiles %>% ldply(genTable1) %>% print_head()
trimmingStats <- logDataFiles %>% ldply(genTable1)
#+ results = 'asis'
trimmingStats %>% head() %>% kable()
write.delim(trimmingStats, file="cutadapt_summary.trimmingStats.txt")
#' [Trimming Statistics](cutadapt_summary.trimmingStats.txt)
#' ## Table 2: adapter information (of each run)
#' ## Adapter trimming statistics
genTable2 <- function(logFile){
#browser()
data.frame(
......@@ -96,10 +102,18 @@ genTable2 <- function(logFile){
overlapped3=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
)
}
table2 <- logDataFiles %>% ldply(genTable2) %>% print_head()
adapterTrimmingStats <- logDataFiles %>% ldply(genTable2)
#+ results = 'asis'
adapterTrimmingStats %>% head() %>% kable()
#+
write.delim(adapterTrimmingStats, file="cutadapt_summary.adapterTrimmingStats.txt")
#' [Adapter Statistics](cutadapt_summary.adapterTrimmingStats.txt)
ggplot(table1, aes(run, num_proReads)) + geom_bar(stat='identity') + coord_flip()
#+ fig.height=10, fig.width=10
ggplot(trimmingStats, aes(run, num_proReads)) + geom_bar(stat='identity') + coord_flip()
ggplot(table2, aes(run, trimmed)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(table2, aes(run, overlapped5)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(table2, aes(run, overlapped3)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, trimmed)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped5)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped3)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment