Skip to content
Snippets Groups Projects
Commit 08a3bee1 authored by Melanie Schneider's avatar Melanie Schneider
Browse files

two tables

parent 522d5257
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env Rscript
#' # First try
##+ echo=FALSE, message=FALSE
## Note This script is supposed to be knitr::spin'ed
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
## can we access variables from the parent spin.R process?
#echo("rscript is ", r_script)
argv = commandArgs(TRUE)
#echo("argv is ", argv)
#if(str_detect(argv[1], "fastqc_summary")) argv <- argv[-1]
if(length(argv) != 1){
stop("Usage: First_try.R <directory with cutadapt log files>")
echo
}
baseDir=argv[1]
if(is.na(file.info(baseDir)$isdir)){
stop(paste("directory '", baseDir,"'does not exist"))
}
baseDir="/home/mel/MPI-Bioinf/Project1_reads/141126_cutadapt_logs"
logDataFiles <- list.files(path=baseDir, pattern="ca.fastq.gz.ca.log", full.names=TRUE, recursive=T)
#echo("files are", logDataFiles)
#' ## Title
echo("Quality Control Summary")
#' ## General information
info1=readLines(pipe( paste("grep -F 'cutadapt' ", logDataFiles[1]) ))
echo(info1)
info2=readLines(pipe( paste("grep -F 'Maximum error rate' ", logDataFiles[1]) ))
echo(info2)
info3=readLines(pipe( paste("grep -F 'No. of adapters' ", logDataFiles[1]) ))
echo(info3)
#' ## cutadapt parameters:
parameters=readLines(pipe( paste("grep -F 'Command line parameters' ", logDataFiles[1]) ))
echo(parameters)
echo("Some explanation:")
if (grepl("-a", parameters) ==TRUE)
echo("-a indicates that the following is a 3' adapter.")
if (grepl("-g", parameters) ==TRUE)
echo("-g indicates that the following is a 5' adapter.")
if (grepl("-b", parameters) ==TRUE)
echo("-b indicates that the adapter is 3' or 5' (both possible).")
if (grepl("-m", parameters) ==TRUE)
echo("Reads shorter than -m bases are thrown away.")
if (grepl("-q", parameters) ==TRUE)
echo("Quality trimming is done with a threshold specified after -q.")
if (grepl("-p", parameters) ==TRUE)
echo("option 'paired output' is used.")
if (grepl("-e", parameters) ==TRUE)
echo("-e changes the error tolerance. (The default maximum error rate is 0.1)")
if (grepl("-O", parameters) ==TRUE)
echo("The minimum overlap length is changed using -O.")
if (grepl("-N", parameters) ==TRUE)
echo("Wildcard characters in the adapter are enabled by -N.")
echo("For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html")
#' ## Table 1: information of each run
genTable1 <- function(logFile){
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
num_proReads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
num_proBases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
trim_reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
qual_trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
trim_bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
too_short=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
)
}
table1 <- logDataFiles %>% ldply(genTable1) %>% print_head()
#' ## Table 2: adapter information (of each run)
genTable2 <- function(logFile){
#browser()
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
overlapped5=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
overlapped3=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
)
}
table2 <- logDataFiles %>% ldply(genTable2) %>% print_head()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment