Skip to content
Snippets Groups Projects
Commit 2b5d595c authored by Melanie Schneider's avatar Melanie Schneider
Browse files

new variable names

parent 8d217177
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
require(knitr)
## can we access variables from the parent spin.R process?
#echo("rscript is ", r_script)
......@@ -17,7 +18,6 @@ argv = commandArgs(TRUE)
if(length(argv) != 1){
stop("Usage: First_try.R <directory with cutadapt log files>")
echo
}
baseDir=argv[1]
......@@ -47,14 +47,13 @@ echo(info3)
parameters=readLines(pipe( paste("grep -F 'Command line parameters' ", logDataFiles[1]) ))
echo(parameters)
echo("Some explanation:")
#' #### Some explanations:
if (grepl("-a", parameters) ==TRUE)
echo("-a indicates that the following is a 3' adapter.")
echo("-a indicates that the following is a 3' end adapter.")
if (grepl("-g", parameters) ==TRUE)
echo("-g indicates that the following is a 5' adapter.")
echo("-g indicates that the following is a 5' end adapter.")
if (grepl("-b", parameters) ==TRUE)
echo("-b indicates that the adapter is 3' or 5' (both possible).")
echo("-b indicates that the adapter is located at the 3' or 5' end (both possible).")
if (grepl("-m", parameters) ==TRUE)
echo("Reads shorter than -m bases are thrown away.")
if (grepl("-q", parameters) ==TRUE)
......@@ -68,19 +67,19 @@ if (grepl("-O", parameters) ==TRUE)
if (grepl("-N", parameters) ==TRUE)
echo("Wildcard characters in the adapter are enabled by -N.")
echo("For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html")
#' #### For more detailed information on cutadapt go to https://cutadapt.readthedocs.org/en/latest/index.html
#' ## Trimming Overview
genTable1 <- function(logFile){
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
num_proReads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
num_proBases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
trim_reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
qual_trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
trim_bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
too_short=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
Run=sub("^([^.]*).*", "\\1", basename(logFile)),
No_of_processed_Reads=(paste("grep -F 'Processed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
No_of_processed_Bases=(paste("grep -F 'Processed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9]+") %>% unlist() %>% as.numeric())[2],
Trimmed_Reads=(paste("grep -F 'Trimmed reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3],
Quality-trimmed=(paste("grep -F 'Quality-trimmed' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
Trimmed_Bases=(paste("grep -F 'Trimmed bases' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[4],
Too_short_Reads=(paste("grep -F 'Too short reads' ", logFile ) %>% pipe() %>% readLines() %>% strsplit( "[^0-9\\.]+") %>% unlist() %>% as.numeric())[3]
)
}
trimmingStats <- logDataFiles %>% ldply(genTable1)
......@@ -95,11 +94,11 @@ write.delim(trimmingStats, file="cutadapt_summary.trimmingStats.txt")
genTable2 <- function(logFile){
#browser()
data.frame(
run=sub("^([^.]*).*", "\\1", basename(logFile)),
adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
overlapped5=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
overlapped3=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
Run=sub("^([^.]*).*", "\\1", basename(logFile)),
Adapter=(paste("grep -F '=== Adapter ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed("'", 3))[,2],
Trimmed=(paste("grep -F '; Trimmed: ' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 6))[,5] %>% as.numeric(),
Overlapped_at_5prime=(paste("grep -F 'overlapped the 5' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric(),
Overlapped_at_3prime=(paste("grep -F 'overlapped the 3' ", logFile ) %>% pipe() %>% readLines() %>% str_split_fixed( "[^0-9]+", 2) )[,1] %>% as.numeric()
)
}
adapterTrimmingStats <- logDataFiles %>% ldply(genTable2)
......@@ -112,8 +111,13 @@ write.delim(adapterTrimmingStats, file="cutadapt_summary.adapterTrimmingStats.tx
#' [Adapter Statistics](cutadapt_summary.adapterTrimmingStats.txt)
#+ fig.height=10, fig.width=10
ggplot(trimmingStats, aes(run, num_proReads)) + geom_bar(stat='identity') + coord_flip()
ggplot(adapterTrimmingStats, aes(run, trimmed)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped5)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(run, overlapped3)) + geom_bar(stat='identity') + facet_wrap(~adapter) + coord_flip()
ggplot(trimmingStats, aes(Run, No_of_processed_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, No_of_processed_Bases)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Trimmed_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Quality-trimmed)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Trimmed_Bases)) + geom_bar(stat='identity') + coord_flip()
ggplot(trimmingStats, aes(Run, Too_short_Reads)) + geom_bar(stat='identity') + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Trimmed)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Overlapped_at_5prime)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
ggplot(adapterTrimmingStats, aes(Run, Overlapped_at_3prime)) + geom_bar(stat='identity') + facet_wrap(~Adapter) + coord_flip()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment