Commit 9ee19050 authored by domingue's avatar domingue
Browse files

Fixed pending issues with genic features

- moved to section 1.7

- Added text explaining the goal

- dot plot now shows alpha using difference of non-exonic counts

Features for #51
parent bf6b1a13
......@@ -894,12 +894,7 @@ deAnnot = deResults %>% left_join(geneInfo)
#merge(.,., suffixes=c("_1", "_2"), by=NULL) %>%
#inner_join(baseMeanPerLvl, id='ensembl_gene_id') %>%
# ==========================================================================
# Added non-exonic ratio if present
# ==========================================================================
#' ### Non-exonic reads
#' Due to either sample/library preparation or incomplete gene annotation, it is possible that some genes will have a disproportionate amount of non-genic reads which are not counted (only reads mapping to exons are taken in consideration for Differential gene expression analysis). If a file with non-exonic counts was provided, differentially expressed genes will be plotted and the non-exonic counts in the conditions for each contrast compared.
#+ results="asis"
# Add non-exonic ratio if present
if (!is.null(opts$genic_counts)) {
non_genic_counts <- read_tsv(genic_file)
......@@ -922,33 +917,6 @@ if (!is.null(opts$genic_counts)) {
inner_join(., ., by = "ensembl_gene_id", suffix = c("_1", "_2"))
deAnnot <- inner_join(deAnnot, non_genic_counts_wide)
## plot non-genic counts of diffex genes per contrast
htmltools::tagList(plyr::alply(contrasts, 1, function(cont){
# cond_1 <- "wt_Kyoto"
# cond_2 <- "mut_9010"
cond_1 <- cont[,1]
cond_2 <- cont[,2]
hits_tab <- deAnnot %>%
select(ensembl_gene_id, is_hit, condition_1, condition_2, contains("nonexonic_ratio")) %>%
filter(condition_1 == cond_1 & condition_2 == cond_2) %>%
filter(is_hit)
if(nrow(hits_tab >= 1)){
p <- ggplot(hits_tab, aes(x = nonexonic_ratio_1, y = nonexonic_ratio_2, text = paste("Gene:", ensembl_gene_id))) +
geom_point() +
labs(
title = "Ratio of non-exonic reads",
x = cond_1,
y = cond_2
) +
theme_light()
ggplotly(p)
}
}))
} else {
cat("This information was not provided.\n")
}
write_tsv(deAnnot, path = paste0(resultsBase, "de_results.txt"))
......@@ -1191,6 +1159,49 @@ mat <- mat - rowMeans(mat)
pheatmap(mat, annotation_col = column2rownames(exDesign, "replicate") %>% select(- col_index))
# ==========================================================================
# Add non-exonic ratio if present
# ==========================================================================
#' ## Non-exonic reads
#'
#' If a file with non-exonic counts was provided, differentially expressed genes will be plotted and the non-exonic counts in the conditions for each contrast compared.
#'
#' Due to either sample/library preparation or incomplete gene annotation, it is possible that some genes will have a disproportionate amount of non-genic reads which are not counted (only reads mapping to exons are taken in consideration for Differential gene expression analysis). This will affect relatively a small number of genes, but is is a useful diagnostic in case it affects the gene of interest in a particular project.
#'
#' Below we plot the proportion of non-exonic reads in each of the conditions. To help visualize the genes which might be false-positives, the color intensity reflects the difference of this proportion between conditions: the darker the dot, larger the number of non-exonic reads in a condition (and not the other) and thus more likely the gene is to be a false positive. This metric is only intended for diagnostics purposes, and we recommend visual inspection of read coverage at these genes (with IGV). It is entirely possible that a gene gene is accurately called as differentially expressed despite a large difference in non-genic counts.
#+ results="asis"
if (!is.null(opts$genic_counts)) {
## plot non-genic counts of diffex genes per contrast
htmltools::tagList(plyr::alply(contrasts, 1, function(cont){
# cond_1 <- "wt_Kyoto"
# cond_2 <- "mut_9010"
cond_1 <- cont[,1]
cond_2 <- cont[,2]
hits_tab <- deAnnot %>%
select(ensembl_gene_id, is_hit, condition_1, condition_2, contains("nonexonic_ratio"), external_gene_name) %>%
filter(condition_1 == cond_1 & condition_2 == cond_2) %>%
filter(is_hit) %>%
mutate(non_exonic_diff = abs(nonexonic_ratio_1 - nonexonic_ratio_2))
if(nrow(hits_tab >= 1)){
p <- ggplot(hits_tab, aes(x = nonexonic_ratio_1, y = nonexonic_ratio_2, alpha = non_exonic_diff, label = external_gene_name, text = paste("Gene:", ensembl_gene_id))) +
geom_point() +
labs(
title = "Ratio of non-exonic reads",
x = cond_1,
y = cond_2
) +
theme_light()
ggplotly(p, height = 600, width=600)
}
}))
} else {
cat("This information was not provided.\n")
}
########################################################################################################################
#' ## Exported Data
#'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment