Commit 094e386f authored by Lena Hersemann's avatar Lena Hersemann

added export of various count type data (raw, normalized, scaled, regressed);...

added export of various count type data (raw, normalized, scaled, regressed); fixed clustering loop for the case when multiple PCs are selected
parent fc4f8fc9
......@@ -202,6 +202,9 @@ seo <- FindVariableGenes(object = seo, mean.function = ExpMean, dispersion.funct
regress_out <- str_split(sop[which(sop$parameter == "regress_out"),]$settings, "[+]") %>% unlist()
seo <- ScaleData(object = seo)
# save scaled counts
scaled_counts <- seo@scale.data
## check cell cycle information
# mouse cell cycle information can be downloaded here: https://www.dropbox.com/s/3dby3bjsaf5arrw/cell_cycle_vignette_files.zip?dl=1
......@@ -362,42 +365,34 @@ data.frame(standard_deviation = sdev, variance = sdev^2) %>% mutate(pc = 1:n())
dir.create("cluster_comp_data", showWarnings = FALSE)
pc_num <- str_split(as.numeric(sop[which(sop$parameter == "calculate_pcs"),]$settings), "[+]") %>% unlist()
pc_num <- str_split(sop[which(sop$parameter == "calculate_pcs"),]$settings, "[+]") %>% unlist()
n_cores <- ifelse(length(pc_num) >= 11, 10, length(pc_num))
all_cluster_infos <- mclapply(pc_num, function(x){
## find clusters
seo <- FindClusters(object = seo, reduction.type = "pca", dims.use = 1:pc, k.param = 30, n.start = 100, algorithm = 1, n.iter = 10, modularity.fxn = 1, prune.SNN = 1/15, resolution = 0.8, print.output = FALSE, save.SNN = TRUE)
seo <- FindClusters(object = seo, reduction.type = "pca", dims.use = 1:x, k.param = 30, n.start = 100, algorithm = 1, n.iter = 10, modularity.fxn = 1, prune.SNN = 1/15, resolution = 0.8, print.output = FALSE, save.SNN = TRUE)
#' **Cells per cluster:**
print(paste0("Cells per cluster for ", x, " PCs:"))
table(seo@ident) %>% t() %>% kable()
# export cluster information
cluster_info <- seo@ident %>%
as.data.frame() %>%
setNames(paste0("cluster_pc", pc)) %>%
setNames(paste0("cluster_pc", x)) %>%
mutate(cell_id = rownames(.)) %>%
push_left("cell_id")
cluster_info %>% setNames(c("cell_id", "cluster")) %>% readr::write_tsv(paste0("cluster_comp_data/cluster_info_", x, "pc.txt"))
# tsne
seo <- RunTSNE(object = seo, dims.use = 1:x, perplexity=50)
if (length(pc_num == 1)) {TSNEPlot(object = seo, do.label = TRUE) + ggtitle(paste0(x, " PCs"))}
# ggsave(paste0("plots/tsne_", x, ".png"))
# export tsne data for later plotting
tsne_data <- as.data.frame(GetCellEmbeddings(
as.data.frame(GetCellEmbeddings(
object = seo,
reduction.type = "tsne",
dims.use = c(1, 2),
cells.use = colnames(seo@data)
)) %>%
mutate(cell_id = rownames(.))
tsne_data %>% write_tsv(paste0("cluster_comp_data/tsne_info_", x, "pc.txt"))
mutate(cell_id = rownames(.)) %>% write_tsv(paste0("cluster_comp_data/tsne_info_", x, "pc.txt"))
# find markers of clusters
......@@ -407,30 +402,15 @@ all_cluster_infos <- mclapply(pc_num, function(x){
write_tsv(paste0("cluster_comp_data/cluster_marker_genes_", x, "pc.txt"))
cluster_info
}, mc.cores = n_cores) %>% reduce(full_join, by = "cell_id")
}, mc.cores = n_cores)
all_cluster_infos %<>% reduce(full_join, by = "cell_id")
write_tsv(all_cluster_infos, "all_cluster_infos.txt")
seo@meta.data %>% left_join(all_cluster_infos, by = "cell_id") %>% write_tsv("cell_infos.txt")
scaled_data <- seo@scale.data %>%
as.data.frame() %>%
mutate(ensembl_gene_id = rownames(.)) %>%
left_join(gene_info, by = "ensembl_gene_id")
scaled_data %>%
write_tsv("cluster_comp_data/scaled_counts.txt")
seo@meta.data %>% left_join(all_cluster_infos, by = "cell_id") %>% write_tsv("cell_infos.txt")
# combine information for set select_pc results and save it in the working directory
selp <- paste0(sop[which(sop$parameter == "select_pc"), ]$settings, "pc")
list(cluster = read_tsv(paste0("cluster_comp_data/cluster_info_", selp, ".txt")),
marker = read_tsv(paste0("cluster_comp_data/cluster_marker_genes_", selp, ".txt")),
tsne = read_tsv(paste0("cluster_comp_data/tsne_info_", selp, ".txt")),
scaled_data = scaled_data
) %>% saveRDS(paste0(selp, "_infos.rds"))
# save counts
list(raw = seo@raw.data, normalized = seo@data, scaled = scaled_counts, regressed = seo@scale.data, genes = gene_info) %>% saveRDS("cluster_comp_data/counts.rds")
list(seo = seo, settings = sop) %>% saveRDS("seurat_object_data.rds")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment