Commit 08a3bee1 authored by Melanie Schneider's avatar Melanie Schneider

two tables

parent 522d5257
#!/usr/bin/env Rscript
#' # First try
##+ echo=FALSE, message=FALSE
## Note This script is supposed to be knitr::spin'ed
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
## can we access variables from the parent spin.R process?
#echo("rscript is ", r_script)
argv = commandArgs(TRUE)
#echo("argv is ", argv)
#if(str_detect(argv[1], "fastqc_summary")) argv <- argv[-1]
if(length(argv) != 1){
stop("Usage: First_try.R <directory with cutadapt log files>")
echo
}
baseDir=argv[1]
if(is.na(file.info(baseDir)$isdir)){
stop(paste("directory '", baseDir,"'does not exist"))
}
baseDir="/home/mel/MPI-Bioinf/Project1_reads/141126_cutadapt_logs"
logDataFiles <- list.files(path=baseDir, pattern="ca.fastq.gz.ca.log", full.names=TRUE, recursive=T)
#echo("files are", logDataFiles)
#' ## Title
echo("Quality Control Summary")
#' ## General information
info1=readLines(pipe( paste("grep -F 'cutadapt' ", logDataFiles[1]) ))
echo(info1)
info2=readLines(pipe( paste("grep -F 'Maximum error rate' ", logDataFiles[1]) ))
echo(info2)
info3=readLines(pipe( paste("grep -F 'No. of adapters' ", logDataFiles[1]) ))
echo(info3)
#' ## cutadapt parameters:
parameters=readLines(pipe( paste("grep -F 'Command line parameters' ", logDataFiles[1]) ))
echo(parameters)
echo("Some explanation:")
if (grepl("-a", parameters) ==TRUE)
echo("-a indicates that the following is a 3' adapter.")
if (grepl("-g", parameters) ==TRUE)
echo("-g indicates that the following is a 5' adapter.")
if (grepl("-b", parameters) ==TRUE)
echo("-b indicates that the adapter is 3' or 5' (both possible).")
if (grepl("-m", parameters) ==TRUE)
echo("Reads shorter than -m bases are thrown away.")
if (grepl("-q", parameters) ==TRUE)
echo("Quality trimming is done with a threshold specified after -q.")
if (grepl("-p", parameters) ==TRUE)
echo("option 'paired output' is used.")
if (grepl("-e", parameters) ==TRUE)
echo("-e changes the error tolerance. (The default maximum error rate is 0.1)")
if (grepl("-O", parameters) ==TRUE)
echo("The minimum overlap length is changed using -O.")
if (grepl("-N", parameters) ==TRUE)
echo("Wildcard characters in the adapter are enabled by -N.")
echo("For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html")
#' ## Table 1: information of each run
genTable1 <- function(logFile){
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
num_proReads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
num_proBases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
trim_reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
qual_trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
trim_bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
too_short=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
)
}
table1 <- logDataFiles %>% ldply(genTable1) %>% print_head()
#' ## Table 2: adapter information (of each run)
genTable2 <- function(logFile){
#browser()
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
overlapped5=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
overlapped3=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
)
}
table2 <- logDataFiles %>% ldply(genTable2) %>% print_head()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment