#!/usr/bin/env Rscript #+ echo=FALSE, message=FALSE suppressMessages(library(docopt)) # retrieve and parse the command-line arguments doc <- ' Create a small summary alignment summary files created when running tophat2. Usage: tophat_qc.R <base_directory> Options: -c Peform correlation analysis ' #print(paste("args are:", commandArgs(TRUE))) opts <- docopt(doc, commandArgs(TRUE)) #opts <- docopt(doc, ". .") if(!exists("opts")){ stop(doc); return } #print("doc opts are") #print(opts) baseDir <- normalizePath(opts$base_directory) if(is.na(file.info(baseDir)$isdir)){ stop(paste("base directory", baseDir, "does not exist")) } devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/ggplot_commons.R") ######################################################################################################################## #' # Mapping Summary for: `r baseDir` parseAlgnSummary_T2_0_11 <- function(alignSummary){ #alignSummary="/projects/bioinfo/holger/projects/marta_rnaseq/human_leipzig/mapping/S5382_aRG_1b_rep1/align_summary.txt" algnData <- readLines(alignSummary) data.frame( condition=basename(dirname(alignSummary)), num_reads=as.numeric(str_match(algnData[2], " ([0-9]*$)")[,2]), mapped_reads=as.numeric(str_match(algnData[3], ":[ ]*([0-9]*) ")[,2][1]) ) %>% transform(mapping_efficiency=100*mapped_reads/num_reads) } algnSummary <- ldply(list.files(".", "align_summary.txt", full.names=TRUE, recursive=T), parseAlgnSummary_T2_0_11) write.delim(algnSummary, file="tophat_mapping_stats.txt") # algnSummary <- read.delim("algnSummary.txt") #' [Tophat Mapping Statistics](tophat_mapping_stats.txt) scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", palette="Set1", "fill", na.value = "grey50") } ggplot(algnSummary, aes(condition, mapping_efficiency)) + geom_bar(stat="identity") + coord_flip() + ylim(0,100) + ggtitle("mapping efficiency") ggplot(algnSummary, aes(condition, num_reads)) + geom_bar(stat="identity") + coord_flip() + ggtitle("read counts") +scale_y_continuous(labels=comma) ggplot(algnSummary, aes(condition, mapped_reads)) + geom_bar(stat="identity") + coord_flip() + ggtitle("alignments counts") + scale_y_continuous(labels=comma) #ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0)) #ggsave2(w=10, h=10, p="mapstats") ##todo multimapper counting ######################################################################################################################## #' ## Alignment Correlation #' Without using any transcriptome as reference, the genome can be binned and alignment counts per bin can be used to perform a correlation analysis. #' Created using [deepTools](https://github.com/fidelram/deepTools) ## http://stackoverflow.com/questions/291813/recommended-way-to-embed-pdf-in-html #+ results="asis" cat("<embed src=\"./bc.pdf\" width=\"1100px\" height=\"1000px\">")