Commit 2b5d595c authored by Melanie Schneider's avatar Melanie Schneider

new variable names

parent 8d217177
......@@ -6,6 +6,7 @@
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
require(knitr)
## can we access variables from the parent spin.R process?
#echo("rscript is ", r_script)
......@@ -17,7 +18,6 @@ argv = commandArgs(TRUE)
if(length(argv) != 1){
stop("Usage: First_try.R <directory with cutadapt log files>")
echo
}
baseDir=argv[1]
......@@ -47,14 +47,13 @@ echo(info3)
parameters=readLines(pipe( paste("grep -F 'Command line parameters' ", logDataFiles[1]) ))
echo(parameters)
echo("Some explanation:")
#' #### Some explanations:
if (grepl("-a", parameters) ==TRUE)
echo("-a indicates that the following is a 3' adapter.")
echo("-a indicates that the following is a 3' end adapter.")
if (grepl("-g", parameters) ==TRUE)
echo("-g indicates that the following is a 5' adapter.")
echo("-g indicates that the following is a 5' end adapter.")
if (grepl("-b", parameters) ==TRUE)
echo("-b indicates that the adapter is 3' or 5' (both possible).")
echo("-b indicates that the adapter is located at the 3' or 5' end (both possible).")
if (grepl("-m", parameters) ==TRUE)
echo("Reads shorter than -m bases are thrown away.")
if (grepl("-q", parameters) ==TRUE)
......@@ -68,19 +67,19 @@ if (grepl("-O", parameters) ==TRUE)
if (grepl("-N", parameters) ==TRUE)
echo("Wildcard characters in the adapter are enabled by -N.")
echo("For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html")
#' #### For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html
#' ## Trimming Overview
genTable1 <- function(logFile){
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
num_proReads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
num_proBases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
trim_reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
qual_trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
trim_bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
too_short=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
Run=sub("^([^.]*).*", "\\1", basename(logFile)),
No_of_processed_Reads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
No_of_processed_Bases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
Trimmed_Reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
Quality-trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
Trimmed_Bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
Too_short_Reads=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
)
}
trimmingStats <- logDataFiles %>% ldply(genTable1)
......@@ -95,11 +94,11 @@ write.delim(trimmingStats, file="cutadapt_summary.trimmingStats.txt")
genTable2 <- function(logFile){
#browser()
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
overlapped5=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
overlapped3=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
Run=sub("^([^.]*).*", "\\1", basename(logFile)),
Adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
Trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
Overlapped_at_5prime=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
Overlapped_at_3prime=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
)
}
adapterTrimmingStats <- logDataFiles %>% ldply(genTable2)
......@@ -112,8 +111,13 @@ write.delim(adapterTrimmingStats, file="cutadapt_summary.adapterTrimmingStats.tx
#' [Adapter Statistics](cutadapt_summary.adapterTrimmingStats.txt)
#+ fig.height=10, fig.width=10
ggplot(trimmingStats, aes(run, num_proReads)) + geom_bar(stat='identity') + coord_flip()
ggplot(adapterTrimmingStats, aes(run, trimmed)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped5)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped3)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(trimmingStats, aes(Run, No_of_processed_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, No_of_processed_Bases)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Trimmed_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Quality-trimmed)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Trimmed_Bases)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Too_short_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Trimmed)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Overlapped_at_5prime)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Overlapped_at_3prime)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment