Commit 006d591f authored by domingue's avatar domingue

New features added

- added min_count argument to avoid overplotting
- added correlation plots
parent 23061039
......@@ -56,6 +56,14 @@ p <- add_argument(
default = "gene"
)
p <- add_argument(
p,
"--min_count",
help = "Option to filter out genes from scatter plot and avoid overplotting. Only genes with at least this number of genic counts will be plotted.",
type = "integer",
default = 0
)
p <- add_argument(
p,
"--attribute",
......@@ -97,6 +105,7 @@ strand_specific <- argv$strand_specific
paired <- argv$paired
feature <- argv$feature
attribute <- argv$attribute
min_count <- argv$min_count
clean_names <- argv$clean_names
prefix <- argv$prefix
n_threads <- argv$n_threads
......@@ -112,6 +121,7 @@ n_threads <- argv$n_threads
# paired <- FALSE
# feature <- "gene"
# attribute <- "gene_id"
# min_count <- 0
# strand_specific <- 0
# prefix <- "test"
# n_threads <- 1
......@@ -139,6 +149,17 @@ load_pack("Rsubread")
dir.create("figure", showWarnings = FALSE)
#' Run configuration was
config <- vec_as_df(unlist(argv)) %>%
filter(!str_detect(name, "^[<-]")) %>%
rbind(c("input_file_num", length(bam_files))) %>%
filter(!name %in% c("opts", "help", ""))
kable(config)
write_tsv(config, ".run_configuration.txt")
# ==========================================================================
# Prepare gene annotations
# ==========================================================================
......@@ -287,6 +308,36 @@ ggplot(final_counts, aes(x = nonexonic_ratio)) +
ggsave(paste0("figure/", prefix, ".non_exonic_distribution.pdf"))
# ==========================================================================
#' ## Correlation of genic and exonic counts
# ==========================================================================
#' Assuming a perfect polyA selection and stranding during library preparation, there should be a near perfect correlation between genic vs exonic counts. Some exceptions will be genes which arbour non-annotated exons of small non-coding RNAs. This plot will help visualize those genes which might be worth further exploration.
counts_filtered <- final_counts %>%
filter(genic_counts > min_count)
#' Here we found a total of `r n_distinct(counts_filtered$ensembl_gene_id)` with more genic than exonic reads. The number of genes per library
## test, there should be no genes with 0 exonic counts
counts_filtered %>%
filter(exonic_counts != 0) %>%
group_by(sample) %>%
slice(10)
ggplot(counts_filtered, aes(x = genic_counts + 1, y = exonic_counts + 1)) +
geom_point(aes(alpha = nonexonic_ratio)) +
facet_wrap(sample ~ .) +
scale_x_log10(label = comma) +
scale_y_log10(label = comma) +
labs(
x = "Genic read counts",
y = "Exonic read counts"
) +
theme_light()
ggsave(paste0("figure/", prefix, ".genic_vs_exonic_counts.pdf"))
# ==========================================================================
#' ## Library level stats
# ==========================================================================
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment