...
 
Commits (3)
......@@ -119,7 +119,7 @@ qcSummary %>% ggplot(aes(score, run, fill=tolower(flag))) +
dupLevels = map_df(fastqDataFiles, function(statsFile){
data.frame(
run=trim_ext(basename(statsFile), c("_fastqc.zip")),
dedup_proportion=as.numeric(readLines(pipe(paste(get_zip_pipe(statsFile, "fastqc_data.txt"), "| grep -F 'Total Deduplicated Percentage' | cut -f2 -d'\t'"))))/100
dedup_proportion=as.numeric(readLines(pipe(paste(get_zip_pipe(statsFile, "fastqc_data.txt"), "| grep -F 'Total Deduplicated Percentage' | cut -f2"))))/100
)
})
......@@ -145,7 +145,7 @@ readBaseQualDist = function(statsFile){
baseStats = read.delim(pipe(
#http://stackoverflow.com/questions/1946363/how-do-i-display-data-from-the-beginning-of-a-file-until-the-first-occurence-of/1947950#1947950
paste(get_zip_pipe(statsFile, "fastqc_data.txt"), " | grep -A200 -F '>>Per base sequence quality' | perl -pe 'last if />>END_MODULE/' | head -n-2 | tail -n+2 | tr '#' ' '")
paste(get_zip_pipe(statsFile, "fastqc_data.txt"), " | grep -A200 -F '>>Per base sequence quality' 2>/dev/null | perl -pe 'last if />>END_MODULE/' | head -n-2 | tail -n+2 | tr '#' ' '")
)) %>% mutate(
run=trim_ext(basename(statsFile), ".zip")
)
......@@ -194,7 +194,7 @@ baseQualities %>%
geom_rect(aes(xmin=-Inf, xmax=Inf, ymin=20, ymax=28), data=runs, alpha=0.05, fill=colors()[654]) +
geom_rect(aes(xmin=-Inf, xmax=Inf, ymin=28, ymax=Inf), data=runs, alpha=0.05, fill="green") +
geom_boxplot(
mapping=aes(x=first_base, ymin = X10th.Percentile, lower = Lower.Quartile , middle = Median, upper = Upper.Quartile , ymax = X90th.Percentile),
mapping=aes(x=first_base, ymin = X10th.Percentile, lower = Lower.Quartile , middle = Median, upper = Upper.Quartile , ymax = X90th.Percentile, group = first_base),
stat = "identity"
) +
facet_wrap(~run, ncol=3) +
......