Newer
Older
#!/usr/bin/env Rscript
#+ echo=FALSE, message=FALSE
suppressMessages(library(docopt))
# retrieve and parse the command-line arguments
doc <- '
Create a small summary alignment summary files created when running tophat2.
Options:
-c Peform correlation analysis
'
#print(paste("args are:", commandArgs(TRUE)))
opts <- docopt(doc, commandArgs(TRUE))
#opts <- docopt(doc, ". .")
if(!exists("opts")){ stop(doc); return }
#print("doc opts are")
#print(opts)
baseDir <- normalizePath(opts$base_directory)
if(is.na(file.info(baseDir)$isdir)){
stop(paste("base directory", baseDir, "does not exist"))
}
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.9/R/ggplot_commons.R")
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
########################################################################################################################
#' # Mapping Summary for: `r baseDir`
parseAlgnSummary_T2_0_11 <- function(alignSummary){
#alignSummary="/projects/bioinfo/holger/projects/marta_rnaseq/human_leipzig/mapping/S5382_aRG_1b_rep1/align_summary.txt"
algnData <- readLines(alignSummary)
data.frame(
condition=basename(dirname(alignSummary)),
num_reads=as.numeric(str_match(algnData[2], " ([0-9]*$)")[,2]),
mapped_reads=as.numeric(str_match(algnData[3], ":[ ]*([0-9]*) ")[,2][1])
) %>% transform(mapping_efficiency=100*mapped_reads/num_reads)
}
algnSummary <- ldply(list.files(".", "align_summary.txt", full.names=TRUE, recursive=T), parseAlgnSummary_T2_0_11)
write.delim(algnSummary, file="tophat_mapping_stats.txt")
# algnSummary <- read.delim("algnSummary.txt")
#' [Tophat Mapping Statistics](tophat_mapping_stats.txt)
scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", palette="Set1", "fill", na.value = "grey50") }
ggplot(algnSummary, aes(condition, mapping_efficiency)) +
geom_bar(stat="identity") +
coord_flip() +
ylim(0,100) +
ggtitle("mapping efficiency")
ggplot(algnSummary, aes(condition, num_reads)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("read counts") +scale_y_continuous(labels=comma)
ggplot(algnSummary, aes(condition, mapped_reads)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("alignments counts") +
scale_y_continuous(labels=comma)
#ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0))
#ggsave2(w=10, h=10, p="mapstats")
########################################################################################################################
#' ## Alignment Correlation
#' Without using any transcriptome as reference, the genome can be binned and alignment counts per bin can be used to perform a correlation analysis.
#' Used tool [deepTools](https://github.com/fidelram/deepTools)
## http://stackoverflow.com/questions/291813/recommended-way-to-embed-pdf-in-html
#+ results="asis"
cat("<embed src=\"./bc.pdf\" width=\"1100px\" height=\"1000px\">")