Commit bcf2169c authored by Lena Hersemann's avatar Lena Hersemann

added subsetting of countMatrix to ncol = 2000 if the input data contains more...

added subsetting of countMatrix to ncol = 2000 if the input data contains more than 2000 cells; removed plotting of diffusion map and pseudo time; added PCA and its data export as R object
parent 2c82440b
#+ include=FALSE
#***********************************************************************************************************************
#' # Diffusion Map and Pseudo Diffusion Time
#' Calculation of both diffusion map and pseudo time is done using the `destiny` package [https://bioconductor.org/packages/release/bioc/html/destiny.html](https://bioconductor.org/packages/release/bioc/html/destiny.html)
#'
#' Created by: `r system("whoami", intern=T)`
#'
#' Created at: `r format(Sys.Date(), format="%B %d %Y")`
# # Diffusion Map and Pseudo Diffusion Time
# Calculation of both diffusion map and pseudo time is done using the `destiny` package [https://bioconductor.org/packages/release/bioc/html/destiny.html](https://bioconductor.org/packages/release/bioc/html/destiny.html)
#***********************************************************************************************************************
#+ include=FALSE
......@@ -14,14 +11,11 @@ suppressMessages(require(docopt))
doc = '
Diffusion map calculation. Please specify count data.
Usage: calc_dm_dpt.R [options] <count_data>
Options:
--results_prefix <results_prefix> result prefix which will be added to each output file [default: ]
Usage: calc_dm_dpt.R <count_data>
'
# commandArgs <- function(x) c("../segerstolpe_counts.txt")
# commandArgs <- function(x) c("count_data.txt")
opts = docopt(doc, commandArgs(TRUE))
......@@ -35,6 +29,13 @@ load_pack(plotly)
# handling input data --------------------------------------------------------------------------------------------------
subset_data <- opts$subset
if(!is.null(subset_data)){
gl <- read_tsv(opts$subset)
if (ncol(gl) > 1) { stop("Please provide a gene list table with one column only") }
colnames(gl) <- "gene_id"
}
results_prefix <- opts$results_prefix
countData <- fread(opts$count_data)
......@@ -43,6 +44,13 @@ countMatrix <- countData %>%
as.matrix()
countMatrix <- countMatrix[rowSums(countMatrix)>0, ]
# select 2000 cells only if more than 2000 cells were provided as input
if (ncol(countMatrix) > 2000) {
countMatrix <- countMatrix[, sample(ncol(countMatrix), 2000)]
}
countMatrix <- countMatrix[rowSums(countMatrix) > 0, ]
# Seurat-based normalization (https://rdrr.io/cran/Seurat/src/R/preprocessing.R) by using its LogNormalize function:
normCountMatrix <- Seurat::LogNormalize(countMatrix, scale.factor = 10000, display.progress = T) %>% as.matrix()
......@@ -50,31 +58,25 @@ normCountMatrix <- Seurat::LogNormalize(countMatrix, scale.factor = 10000, displ
rm(countData)
rm(countMatrix)
# calculate and save diffusion map and pseudo diffusion time plot
es <- ExpressionSet(normCountMatrix)
dm <- DiffusionMap(es)
# saveRDS(dm, "dm.rds")
# dm <- readRDS("dm.rds")
dpt <- DPT(dm)
# saveRDS(dpt, "dpt.rds")
# dpt <- readRDS("dpt.rds")
try(dpt <- DPT(dm))
saveRDS(dm, "dm.rds")
# readRDS("dm.rds")
saveRDS(dpt, "dpt.rds")
if(exists("dpt")) { saveRDS(dpt, "dpt.rds") }
# readRDS("dpt.rds")
#' ## Diffusion Map
dm_data <- as.data.frame(dm)
plot_ly(dm_data, x = ~DC1, y = ~DC2, z = ~DC3, size = I(5), type = "scatter3d")
#additionally calculate PCA
data.pca = prcomp(t(normCountMatrix), retx = TRUE, center = TRUE, scale. = TRUE)
data.percent <- round((((data.pca$sdev) ^ 2 / sum(data.pca$sdev ^ 2)) * 100)[1 : 2])
#' ## Pseudo Diffusion Time
plot(dpt)
pca.data <- data.pca$x %>% as.data.frame()
saveRDS(pca.data, "pca.rds")
#-----------------------------------------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment