Commit 09548d3a authored by Lena Hersemann's avatar Lena Hersemann

added expression_explorer as bash script

parent 196c84d0
#!/usr/bin/env bash
export SCRIPT_DIRECTORY="$(dirname "$0")/"
/usr/local/bin/Rscript -<<"EOF" ${SCRIPT_DIRECTORY}
args = commandArgs(trailingOnly = TRUE)
# LOAD packages --------------------------------------------------------------------------------------------------------
if(!"devtools" %in% installed.packages()[,"Package"]) {
install.packages("devtools")
}
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/core_commons.R")
load_pack(shiny)
load_pack(shinyFiles)
load_pack(DT)
load_pack(plotly)
load_pack(shinyjqui)
# Determine data directory from execution context ---------------------------------------------------------------------
dataPath= args[1] #%>% dirname()%>% dirname()%>% dirname()
## https://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script
#initial.options <- commandArgs(trailingOnly = FALSE)
#file.arg.name <- "--file="
#script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
## script.name="/Volumes/cerebral-organoids/RNA-Seq/data/hsap/dge_analysis/expression_explorer.app/Contents/MacOS/expression_explorer"
#dataPath= script.name %>% dirname() %>% dirname()%>% dirname() %>% dirname()
# FUNCTIONS ------------------------------------------------------------------------------------------------------------
calc_ci = function(df, variable, ci_interval=0.95){
variable <- enquo(variable)
# http://dplyr.tidyverse.org/articles/programming.html
mean_name <- paste0( quo_name(variable), "_mean")
ci_name <- paste0(quo_name(variable), "_ci")
# echo(glue::glue("varname is {ci_name}"))
df %>% summarize(
mean = mean(!!variable),
sd = sd(!!variable),
N = n(),
se = sd/sqrt(N),
!!ci_name := qt(ci_interval/2+0.5, N-1)*se,
!!mean_name := mean
) %>% select(-c(mean, sd, N, se, mean))
}
# LOAD data ------------------------------------------------------------------------------------------------------------
de_res <- read_tsv(file.path(dataPath, "de_results.txt")) %>%
transmute(external_gene_name, ensembl_gene_id, condition_1, condition_2, logfc = c1_over_c2_logfc, pvalue, padj, is_hit, c1_overex)
de_res %<>% mutate_at(vars(padj, logfc, pvalue), round,2)
design <- read_tsv(file.path(dataPath, "basic_design.txt")) %>% select(condition, replicate)
fpkms <- read_tsv(file.path(dataPath, "fpkms_by_replicate.txt")) %>%
mutate(method = "fpkm") %>%
select(-gene_description) %>%
gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>%
left_join(design, by = "replicate")
tpms <- read_tsv(file.path(dataPath, "tpms_by_replicate.txt")) %>%
mutate(method = "tpm") %>%
select(-gene_description) %>%
gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>%
left_join(design, by = "replicate")
norm_counts <- read_tsv(file.path(dataPath, "sizefac_normalized_counts_by_replicate.txt")) %>%
gather(replicate, rep_values, -ensembl_gene_id) %>%
left_join(design, by = "replicate") %>%
left_join(de_res %>% distinct(ensembl_gene_id, external_gene_name), by = "ensembl_gene_id") %>%
transmute(gene_name = external_gene_name, ensembl_gene_id, method = "norm_counts", replicate, rep_values, condition)
all <- rbind(fpkms, tpms, norm_counts)
#calculate confidence intervals:
ci <- all %>% group_by(gene_name, condition, method) %>% calc_ci(., rep_values)
all %<>% left_join(ci)
#extract gene_names
all <- rbind(all, all %>% mutate(gene_name = ensembl_gene_id))
gene_names <- sort(unique(all$gene_name))
first_entry <- gene_names[1]
sample_names <- unique(design$condition)
app <- shinyApp(
# USER INTERFACE -------------------------------------------------------------------------------------------------------
ui <- fixedPage(
headerPanel("Expression Explorer"),
fixedPage("",
fixedRow(
column(12, style = "height:20px")
),
fixedRow(
column(4, style = "height:100px",
selectInput(inputId = "gene_name", label = "Gene name", choices = gene_names, multiple = TRUE, selected = first_entry, selectize=TRUE)
),
column(4, style = "height:100px",
radioButtons("method", label = "Normalization method", choiceNames = c("FPKM", "TPM", "Normalized Counts"), choiceValues = c("fpkm", "tpm", "norm_counts"))
),
column(4, style = "height:100px",
checkboxGroupInput("options", label = "Optional", choiceNames = c("show confidence interval", "show line"), choiceValues = c("plot_ci", "plot_line"))
)
),
fixedRow(
column(12,
orderInput("sample", label = "Change sample order:", items = sample_names)#,
#verbatimTextOutput('order')
)
),
hr(),
# fixedRow(div(style = "margin_top:50px;"),
fixedRow(
column(6, HTML(paste('<br/>', '<br/>')),
# plotlyOutput(outputId = "points"),
plotOutput(outputId = "points")
),
column(6, HTML(paste('<br/>', '<br/>')),
tabPanel("table", DT::dataTableOutput("table"), style = "font-size: 75%; width: 75%")
)
)
)
),
# shinyApp(ui = ui, server = server)
# SERVER FUNCTIONS -----------------------------------------------------------------------------------------------------
server <- function(input, output, session) {
# observe({
# query <- parseQueryString(session$clientData$url_search)
#
# if (!is.null(query[['gene_id']])) {
# # http://127.0.0.1:5241/?gene_id=ENSG00000000457
# # http://127.0.0.1:5241/?gene_id=ENSG00000000457,ENSG00000001167,ENSG00000001460
#
# # map ids to symbols if needed
# #separate multiple ids
# url_provided_ids = str_split(query[['gene_id']], ",") %>% unlist
# # browser()
#
# if(any(str_detect(url_provided_ids, "E.*[0-9]{10,}"))){
# url_provided_ids = all %>% filter(ensembl_gene_id %in% url_provided_ids) %$% unique(gene_name)
# }
#
# ## see https://shiny.rstudio.com/reference/shiny/0.14/updateSelectInput.html
# updateSelectInput(session, "gene_name", selected=url_provided_ids)
# }
#
# if (!is.null(query[['gene_name']])) {
# # http://127.0.0.1:5241/?gene_name=TNMD
#
# url_provided_ids = str_split(query[['gene_name']], ",") %>% unlist
# # updateTextInput(session, "gene_name", value = url_provided_ids)
# updateSelectInput(session, "gene_name", selected=url_provided_ids)
# }
# })
# prepare output data
table_data <- reactive({
selected_gene <- all[which(all$gene_name %in% input$gene_name),]$ensembl_gene_id
de_res %>% filter(ensembl_gene_id %in% selected_gene) %>% select(-is_hit, -c1_overex)
# de_res %>% filter(external_gene_name %in% input$gene_name) %>% select(-is_hit, -c1_overex)
})
# text_data <- reactive({
# lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x) %>% select(gene_name, gene_description, ensembl_gene_id) %>% filter(!duplicated(gene_name))})
# })
# plot_data <- reactive({
# lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x & grepl(input$method, method) & grepl(input$sample, sample))})
# })
# table_data <- reactive({
# lapply(as.list(input$gene_name), function(x){data <- de_res %>% filter(external_gene_name == x) %>% select(-external_gene_name)})
# })
# output$points <- renderPlotly({
output$points <- renderPlot({
pd <- position_dodge(0.3)
plot_data <- all %>% filter(gene_name %in% input$gene_name & grepl(input$method, method))
# plot_data <- all %>% filter(gene_name %in% c("Gnai3", "Cdc45") & grepl("fpkm", method))
plot_data$condition <- factor(plot_data$condition, levels = input$sample_order)
plot <- ggplot(plot_data, aes(condition, rep_values, color = gene_name)) +
geom_point(position=pd) +
theme(axis.text.x = element_text(angle = 50, hjust = 1)) +
ylab(toupper(input$method)) +
xlab("condition")
if (is.element("plot_line", input$options)) {
plot <- plot + geom_line(aes(condition, rep_values_mean, group = gene_name), data = plot_data, position=pd)
}
if (is.element("plot_ci", input$options)) {
plot <- plot + geom_errorbar(aes(ymin=rep_values_mean-rep_values_ci, ymax=rep_values_mean+rep_values_ci), size=0.2, width = 0.3, position=pd)
}
if (is.element("plot_line", input$options) || is.element("plot_ci", input$options)) {
} else {
plot <- plot + stat_summary(fun.y="mean", geom="point", pch = "_", size=8, position = pd)
}
# stat_summary(fun.data = "mean_cl_boot", color = "black", size = 1,
# geom = "errorbar", width = 1, fun.args = list(conf.int = 0.9), aes(group = gene_name),
# position=position_dodge(width=0.4))
plot(plot)
# ggplotly(plot) %>% layout(margin = list(b = 95), legend = list(orientation = "h", y = 10))
})
output$table <- DT::renderDataTable({ DT::datatable(table_data()) })
}
)
#runApp(app)
runApp(app, launch.browser=TRUE, port=5241)
#shinyApp(ui = ui, server = server)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment