Something went wrong on our end
-
Holger Brandl authoredHolger Brandl authored
bowtie2_qc.R 2.71 KiB
#!/usr/bin/env Rscript
#+ echo=FALSE, message=FALSE
suppressMessages(library(docopt))
# retrieve and parse the command-line arguments
doc <- '
Create a small summary for bam-files
Usage: bams_qc.R <base_directory>
Options:
-c Peform correlation analysis
'
opts <- docopt(doc, commandArgs(TRUE))
#opts <- docopt(doc, ".")
if(!exists("opts")){ stop(doc); return }
#print(opts)
baseDir <- normalizePath(opts$base_directory)
if(is.na(file.info(baseDir)$isdir)){
stop(paste("base directory", baseDir, "does not exist"))
}
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R")
########################################################################################################################
#' # Mapping Summary for: `r baseDir`
logSuffix=".bowtie.log"
parseAlgnSummary <- function(alignSummary){
#alignSummary="/lustre/projects/bioinfo/holger/projects/khan_chipseq_h1/alignments/H1L_Shield.bowtie.log"
algnData <- readLines(alignSummary)
data.frame(
sample=trimEnd(basename(alignSummary), logSuffix),
num_reads=as.numeric(str_split_fixed(algnData[1], " ", 2)[1]),
mapping_efficiency=as.numeric(str_replace(str_split_fixed(algnData[6], " ", 2)[1], "%", "")),
unique_mappers=as.numeric(str_split_fixed(str_trim(algnData[4]), " ", 2)[1]),
unique_mapper_prop=str_match(algnData[4], "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric(),
multi_mappers=as.numeric(str_split_fixed(str_trim(algnData[5]), " ", 2)[1]),
multi_mappers_prop=str_match(algnData[5], "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric()
)
}
algnSummary <- list.files(baseDir, logSuffix, full.names=TRUE, recursive=T) %>% ldply(parseAlgnSummary)
write.delim(algnSummary, file="bowtie2_qc.txt")
scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", palette="Set1", "fill", na.value = "grey50") }
#+ fig.height=nrow(algnSummary)/3
ggplot(algnSummary, aes(sample, mapping_efficiency)) +
geom_bar(stat="identity") +
coord_flip() +
ylim(0,100) +
ggtitle("mapping efficiency")
ggplot(algnSummary, aes(sample, num_reads)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("read counts") +scale_y_continuous(labels=comma)
ggplot(algnSummary, aes(sample, unique_mapper_prop)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("unique-mapper proportions") +
scale_y_continuous(labels=comma)
ggplot(algnSummary, aes(sample, multi_mappers_prop)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("multi-mapper proportions") +
scale_y_continuous(labels=comma)