From 09548d3ade35282d4764290ffb9a329c4c7c4d7c Mon Sep 17 00:00:00 2001 From: Lena Hersemann Date: Tue, 11 Jun 2019 11:19:22 +0200 Subject: [PATCH] added expression_explorer as bash script --- .../expression_explorer/expression_explorer | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 dge_workflow/expression_explorer/expression_explorer diff --git a/dge_workflow/expression_explorer/expression_explorer b/dge_workflow/expression_explorer/expression_explorer new file mode 100644 index 0000000..09ed9c2 --- /dev/null +++ b/dge_workflow/expression_explorer/expression_explorer @@ -0,0 +1,245 @@ +#!/usr/bin/env bash + +export SCRIPT_DIRECTORY="$(dirname "$0")/" + +/usr/local/bin/Rscript -<<"EOF" ${SCRIPT_DIRECTORY} + +args = commandArgs(trailingOnly = TRUE) + +# LOAD packages -------------------------------------------------------------------------------------------------------- +if(!"devtools" %in% installed.packages()[,"Package"]) { + install.packages("devtools") +} +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/core_commons.R") +load_pack(shiny) +load_pack(shinyFiles) +load_pack(DT) +load_pack(plotly) +load_pack(shinyjqui) + + + +# Determine data directory from execution context --------------------------------------------------------------------- + + dataPath= args[1] #%>% dirname()%>% dirname()%>% dirname() + +## https://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script +#initial.options <- commandArgs(trailingOnly = FALSE) +#file.arg.name <- "--file=" +#script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +## script.name="/Volumes/cerebral-organoids/RNA-Seq/data/hsap/dge_analysis/expression_explorer.app/Contents/MacOS/expression_explorer" +#dataPath= script.name %>% dirname() %>% dirname()%>% dirname() %>% dirname() + + +# FUNCTIONS ------------------------------------------------------------------------------------------------------------ + +calc_ci = function(df, variable, ci_interval=0.95){ + variable <- enquo(variable) + + # http://dplyr.tidyverse.org/articles/programming.html + mean_name <- paste0( quo_name(variable), "_mean") + ci_name <- paste0(quo_name(variable), "_ci") + # echo(glue::glue("varname is {ci_name}")) + + df %>% summarize( + mean = mean(!!variable), + sd = sd(!!variable), + N = n(), + se = sd/sqrt(N), + !!ci_name := qt(ci_interval/2+0.5, N-1)*se, + !!mean_name := mean + ) %>% select(-c(mean, sd, N, se, mean)) +} + + +# LOAD data ------------------------------------------------------------------------------------------------------------ + +de_res <- read_tsv(file.path(dataPath, "de_results.txt")) %>% + transmute(external_gene_name, ensembl_gene_id, condition_1, condition_2, logfc = c1_over_c2_logfc, pvalue, padj, is_hit, c1_overex) + +de_res %<>% mutate_at(vars(padj, logfc, pvalue), round,2) + +design <- read_tsv(file.path(dataPath, "basic_design.txt")) %>% select(condition, replicate) + +fpkms <- read_tsv(file.path(dataPath, "fpkms_by_replicate.txt")) %>% + mutate(method = "fpkm") %>% + select(-gene_description) %>% + gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>% + left_join(design, by = "replicate") + +tpms <- read_tsv(file.path(dataPath, "tpms_by_replicate.txt")) %>% + mutate(method = "tpm") %>% + select(-gene_description) %>% + gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>% + left_join(design, by = "replicate") + +norm_counts <- read_tsv(file.path(dataPath, "sizefac_normalized_counts_by_replicate.txt")) %>% + gather(replicate, rep_values, -ensembl_gene_id) %>% + left_join(design, by = "replicate") %>% + left_join(de_res %>% distinct(ensembl_gene_id, external_gene_name), by = "ensembl_gene_id") %>% + transmute(gene_name = external_gene_name, ensembl_gene_id, method = "norm_counts", replicate, rep_values, condition) + +all <- rbind(fpkms, tpms, norm_counts) + +#calculate confidence intervals: +ci <- all %>% group_by(gene_name, condition, method) %>% calc_ci(., rep_values) +all %<>% left_join(ci) + + +#extract gene_names +all <- rbind(all, all %>% mutate(gene_name = ensembl_gene_id)) + +gene_names <- sort(unique(all$gene_name)) +first_entry <- gene_names[1] +sample_names <- unique(design$condition) + + + +app <- shinyApp( + + # USER INTERFACE ------------------------------------------------------------------------------------------------------- + + ui <- fixedPage( + + headerPanel("Expression Explorer"), + + fixedPage("", + fixedRow( + column(12, style = "height:20px") + ), + + fixedRow( + column(4, style = "height:100px", + selectInput(inputId = "gene_name", label = "Gene name", choices = gene_names, multiple = TRUE, selected = first_entry, selectize=TRUE) + ), + column(4, style = "height:100px", + radioButtons("method", label = "Normalization method", choiceNames = c("FPKM", "TPM", "Normalized Counts"), choiceValues = c("fpkm", "tpm", "norm_counts")) + ), + column(4, style = "height:100px", + checkboxGroupInput("options", label = "Optional", choiceNames = c("show confidence interval", "show line"), choiceValues = c("plot_ci", "plot_line")) + ) + + ), + + fixedRow( + column(12, + orderInput("sample", label = "Change sample order:", items = sample_names)#, + #verbatimTextOutput('order') + ) + ), + + hr(), + + # fixedRow(div(style = "margin_top:50px;"), + fixedRow( + column(6, HTML(paste('
', '
')), + # plotlyOutput(outputId = "points"), + plotOutput(outputId = "points") + ), + + column(6, HTML(paste('
', '
')), + tabPanel("table", DT::dataTableOutput("table"), style = "font-size: 75%; width: 75%") + ) + ) + ) + ), + + + # shinyApp(ui = ui, server = server) + + + # SERVER FUNCTIONS ----------------------------------------------------------------------------------------------------- + + server <- function(input, output, session) { + +# observe({ +# query <- parseQueryString(session$clientData$url_search) +# +# if (!is.null(query[['gene_id']])) { +# # http://127.0.0.1:5241/?gene_id=ENSG00000000457 +# # http://127.0.0.1:5241/?gene_id=ENSG00000000457,ENSG00000001167,ENSG00000001460 +# +# # map ids to symbols if needed +# #separate multiple ids +# url_provided_ids = str_split(query[['gene_id']], ",") %>% unlist +# # browser() +# +# if(any(str_detect(url_provided_ids, "E.*[0-9]{10,}"))){ +# url_provided_ids = all %>% filter(ensembl_gene_id %in% url_provided_ids) %$% unique(gene_name) +# } +# +# ## see https://shiny.rstudio.com/reference/shiny/0.14/updateSelectInput.html +# updateSelectInput(session, "gene_name", selected=url_provided_ids) +# } +# +# if (!is.null(query[['gene_name']])) { +# # http://127.0.0.1:5241/?gene_name=TNMD +# +# url_provided_ids = str_split(query[['gene_name']], ",") %>% unlist +# # updateTextInput(session, "gene_name", value = url_provided_ids) +# updateSelectInput(session, "gene_name", selected=url_provided_ids) +# } +# }) + + # prepare output data + table_data <- reactive({ + selected_gene <- all[which(all$gene_name %in% input$gene_name),]$ensembl_gene_id + de_res %>% filter(ensembl_gene_id %in% selected_gene) %>% select(-is_hit, -c1_overex) +# de_res %>% filter(external_gene_name %in% input$gene_name) %>% select(-is_hit, -c1_overex) + }) + + # text_data <- reactive({ + # lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x) %>% select(gene_name, gene_description, ensembl_gene_id) %>% filter(!duplicated(gene_name))}) + # }) + # plot_data <- reactive({ + # lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x & grepl(input$method, method) & grepl(input$sample, sample))}) + # }) + # table_data <- reactive({ + # lapply(as.list(input$gene_name), function(x){data <- de_res %>% filter(external_gene_name == x) %>% select(-external_gene_name)}) + # }) + + + # output$points <- renderPlotly({ + output$points <- renderPlot({ + pd <- position_dodge(0.3) + + plot_data <- all %>% filter(gene_name %in% input$gene_name & grepl(input$method, method)) + # plot_data <- all %>% filter(gene_name %in% c("Gnai3", "Cdc45") & grepl("fpkm", method)) + plot_data$condition <- factor(plot_data$condition, levels = input$sample_order) + + plot <- ggplot(plot_data, aes(condition, rep_values, color = gene_name)) + + geom_point(position=pd) + + theme(axis.text.x = element_text(angle = 50, hjust = 1)) + + ylab(toupper(input$method)) + + xlab("condition") + + if (is.element("plot_line", input$options)) { + plot <- plot + geom_line(aes(condition, rep_values_mean, group = gene_name), data = plot_data, position=pd) + } + + if (is.element("plot_ci", input$options)) { + plot <- plot + geom_errorbar(aes(ymin=rep_values_mean-rep_values_ci, ymax=rep_values_mean+rep_values_ci), size=0.2, width = 0.3, position=pd) + } + + if (is.element("plot_line", input$options) || is.element("plot_ci", input$options)) { + } else { + plot <- plot + stat_summary(fun.y="mean", geom="point", pch = "_", size=8, position = pd) + } + # stat_summary(fun.data = "mean_cl_boot", color = "black", size = 1, + # geom = "errorbar", width = 1, fun.args = list(conf.int = 0.9), aes(group = gene_name), + # position=position_dodge(width=0.4)) + + plot(plot) + # ggplotly(plot) %>% layout(margin = list(b = 95), legend = list(orientation = "h", y = 10)) + }) + + output$table <- DT::renderDataTable({ DT::datatable(table_data()) }) + + } +) + + + +#runApp(app) +runApp(app, launch.browser=TRUE, port=5241) +#shinyApp(ui = ui, server = server) \ No newline at end of file -- GitLab