expression_explorer 9.72 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
#!/usr/bin/env bash

export SCRIPT_DIRECTORY="$(dirname "$0")/"

/usr/local/bin/Rscript -<<"EOF" ${SCRIPT_DIRECTORY}

args = commandArgs(trailingOnly = TRUE)

# LOAD packages --------------------------------------------------------------------------------------------------------
if(!"devtools" %in% installed.packages()[,"Package"]) {
    install.packages("devtools")
}
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.44/R/core_commons.R")
load_pack(shiny)
load_pack(shinyFiles)
load_pack(DT)
load_pack(plotly)
load_pack(shinyjqui)



# Determine data directory from execution context  ---------------------------------------------------------------------

 dataPath= args[1] #%>% dirname()%>% dirname()%>% dirname()

## https://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script
#initial.options <- commandArgs(trailingOnly = FALSE)
#file.arg.name <- "--file="
#script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
## script.name="/Volumes/cerebral-organoids/RNA-Seq/data/hsap/dge_analysis/expression_explorer.app/Contents/MacOS/expression_explorer"
#dataPath= script.name %>% dirname() %>% dirname()%>% dirname() %>% dirname()


# FUNCTIONS ------------------------------------------------------------------------------------------------------------

calc_ci = function(df, variable, ci_interval=0.95){
    variable <- enquo(variable)

    # http://dplyr.tidyverse.org/articles/programming.html
    mean_name <- paste0( quo_name(variable), "_mean")
    ci_name <- paste0(quo_name(variable), "_ci")
    # echo(glue::glue("varname is {ci_name}"))

    df %>% summarize(
    mean = mean(!!variable),
    sd = sd(!!variable),
    N = n(),
    se = sd/sqrt(N),
    !!ci_name := qt(ci_interval/2+0.5, N-1)*se,
    !!mean_name := mean
    ) %>% select(-c(mean, sd, N, se, mean))
}


# LOAD data ------------------------------------------------------------------------------------------------------------

de_res <- read_tsv(file.path(dataPath, "de_results.txt")) %>%
    transmute(external_gene_name, ensembl_gene_id, condition_1, condition_2, logfc = c1_over_c2_logfc, pvalue, padj, is_hit, c1_overex)

de_res %<>% mutate_at(vars(padj, logfc, pvalue), round,2)

design <- read_tsv(file.path(dataPath, "basic_design.txt")) %>% select(condition, replicate)

fpkms <- read_tsv(file.path(dataPath, "fpkms_by_replicate.txt")) %>%
    mutate(method = "fpkm") %>%
    select(-gene_description) %>%
    gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>%
    left_join(design, by = "replicate")

tpms <- read_tsv(file.path(dataPath, "tpms_by_replicate.txt")) %>%
    mutate(method = "tpm") %>%
    select(-gene_description) %>%
    gather(replicate, rep_values, -c(ensembl_gene_id, gene_name, method)) %>%
    left_join(design, by = "replicate")

norm_counts <- read_tsv(file.path(dataPath, "sizefac_normalized_counts_by_replicate.txt")) %>%
    gather(replicate, rep_values, -ensembl_gene_id) %>%
    left_join(design, by = "replicate") %>%
    left_join(de_res %>% distinct(ensembl_gene_id, external_gene_name), by = "ensembl_gene_id") %>%
    transmute(gene_name = external_gene_name, ensembl_gene_id, method = "norm_counts", replicate, rep_values, condition)

all <- rbind(fpkms, tpms, norm_counts)

#calculate confidence intervals:
ci <- all %>% group_by(gene_name, condition, method) %>% calc_ci(., rep_values)
all %<>% left_join(ci)


#extract gene_names
all <- rbind(all, all %>% mutate(gene_name = ensembl_gene_id))

gene_names <- sort(unique(all$gene_name))
first_entry <- gene_names[1]
sample_names <- unique(design$condition)



app <- shinyApp(

    # USER INTERFACE -------------------------------------------------------------------------------------------------------

    ui <- fixedPage(

        headerPanel("Expression Explorer"),

        fixedPage("",
            fixedRow(
                column(12, style = "height:20px")
            ),

            fixedRow(
                column(4, style = "height:100px",
                    selectInput(inputId = "gene_name", label = "Gene name", choices = gene_names, multiple = TRUE, selected = first_entry, selectize=TRUE)
                ),
                column(4, style = "height:100px",
                    radioButtons("method", label = "Normalization method", choiceNames = c("FPKM", "TPM", "Normalized Counts"), choiceValues = c("fpkm", "tpm", "norm_counts"))
                ),
                column(4, style = "height:100px",
                    checkboxGroupInput("options", label = "Optional", choiceNames = c("show confidence interval", "show line"), choiceValues = c("plot_ci", "plot_line"))
                )

            ),

            fixedRow(
                column(12,
                    orderInput("sample", label = "Change sample order:", items = sample_names)#,
                    #verbatimTextOutput('order')
                )
            ),

            hr(),

           # fixedRow(div(style = "margin_top:50px;"),
            fixedRow(
                column(6, HTML(paste('<br/>', '<br/>')),
                    # plotlyOutput(outputId = "points"),
                    plotOutput(outputId = "points")
                ),

                column(6, HTML(paste('<br/>', '<br/>')),
                    tabPanel("table", DT::dataTableOutput("table"), style = "font-size: 75%; width: 75%")
                )
            )
        )
    ),


    # shinyApp(ui = ui, server = server)


    # SERVER FUNCTIONS -----------------------------------------------------------------------------------------------------

    server <- function(input, output, session) {

#        observe({
#            query <- parseQueryString(session$clientData$url_search)
#
#            if (!is.null(query[['gene_id']])) {
#                # http://127.0.0.1:5241/?gene_id=ENSG00000000457
#                # http://127.0.0.1:5241/?gene_id=ENSG00000000457,ENSG00000001167,ENSG00000001460
#
#                # map ids to symbols if needed
#                #separate multiple ids
#                url_provided_ids = str_split(query[['gene_id']], ",") %>% unlist
#                # browser()
#
#                if(any(str_detect(url_provided_ids, "E.*[0-9]{10,}"))){
#                    url_provided_ids = all %>% filter(ensembl_gene_id %in% url_provided_ids) %$% unique(gene_name)
#                }
#
#                ## see https://shiny.rstudio.com/reference/shiny/0.14/updateSelectInput.html
#                updateSelectInput(session, "gene_name", selected=url_provided_ids)
#            }
#
#            if (!is.null(query[['gene_name']])) {
#                # http://127.0.0.1:5241/?gene_name=TNMD
#
#                url_provided_ids = str_split(query[['gene_name']], ",") %>% unlist
#                # updateTextInput(session, "gene_name", value = url_provided_ids)
#                updateSelectInput(session, "gene_name", selected=url_provided_ids)
#            }
#        })

        # prepare output data
        table_data <- reactive({
                 selected_gene <- all[which(all$gene_name %in% input$gene_name),]$ensembl_gene_id
                 de_res %>% filter(ensembl_gene_id %in% selected_gene) %>% select(-is_hit, -c1_overex)
#                de_res %>% filter(external_gene_name %in% input$gene_name) %>% select(-is_hit, -c1_overex)
        })

            # text_data <- reactive({
            #     lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x) %>% select(gene_name, gene_description, ensembl_gene_id) %>% filter(!duplicated(gene_name))})
            # })
            # plot_data <- reactive({
            #     lapply(as.list(input$gene_name), function(x){data <- all %>% filter(gene_name == x & grepl(input$method, method) & grepl(input$sample, sample))})
            # })
            # table_data <- reactive({
            #     lapply(as.list(input$gene_name), function(x){data <- de_res %>% filter(external_gene_name == x) %>% select(-external_gene_name)})
            # })


        # output$points <- renderPlotly({
        output$points <- renderPlot({
            pd <- position_dodge(0.3)

            plot_data <- all %>% filter(gene_name %in% input$gene_name & grepl(input$method, method))
            # plot_data <- all %>% filter(gene_name %in% c("Gnai3", "Cdc45") & grepl("fpkm", method))
            plot_data$condition <- factor(plot_data$condition, levels = input$sample_order)

            plot <- ggplot(plot_data, aes(condition, rep_values, color = gene_name)) +
                geom_point(position=pd) +
                theme(axis.text.x = element_text(angle = 50, hjust = 1)) +
                ylab(toupper(input$method)) +
                xlab("condition")

            if (is.element("plot_line", input$options)) {
                    plot <- plot + geom_line(aes(condition, rep_values_mean, group = gene_name), data = plot_data, position=pd)
                }

            if (is.element("plot_ci", input$options)) {
                plot <- plot + geom_errorbar(aes(ymin=rep_values_mean-rep_values_ci, ymax=rep_values_mean+rep_values_ci), size=0.2, width = 0.3, position=pd)
            }

            if (is.element("plot_line", input$options) || is.element("plot_ci", input$options)) {
            } else {
                plot <- plot + stat_summary(fun.y="mean", geom="point", pch = "_", size=8, position = pd)
            }
            # stat_summary(fun.data = "mean_cl_boot", color = "black", size = 1,
            # geom = "errorbar", width = 1, fun.args = list(conf.int = 0.9), aes(group = gene_name),
            # position=position_dodge(width=0.4))

            plot(plot)
            # ggplotly(plot) %>% layout(margin = list(b = 95), legend = list(orientation = "h", y = 10))
        })

        output$table <- DT::renderDataTable({ DT::datatable(table_data()) })

    }
)



#runApp(app)
runApp(app, launch.browser=TRUE, port=5241)
#shinyApp(ui = ui, server = server)