Commit a28a1252 authored by Lena Hersemann's avatar Lena Hersemann

removed filtering step

parent 68335f0c
......@@ -2,7 +2,7 @@
#+ include=FALSE
#**************************************************************
#' # Quality control and filtering of single-cell RNASeq data
#' # Quality check of single-cell RNASeq data
#**************************************************************
# https://www.bioconductor.org/help/workflows/simpleSingleCell/
......@@ -183,25 +183,25 @@ ifelse(length(spike_plots) > 0 ,grid.arrange(grobs = spike_plots, ncol = length(
# ifelse(length(mito_plots) > 0 , grid.arrange(grobs = spike_plots, ncol = length(sce), heights=unit(0.5, "npc")), print(no_mitos))
#' <br><br>
#' ## Filtering by library size and number of expressed genes
#' Filtering of cells is done based on the library size and number of expressed genes per cell using the Median Absolute Deviation (MAD) (http://www.statisticshowto.com/median-absolute-deviation/)
drop_table <- data.frame(sample = character(0), "by_lib_size" = numeric(0), "by_feature" = numeric(0), "remaining_cells" = numeric(0))
for (i in names(sce)) {
libsize.drop <- scater::isOutlier(sce[[i]]$total_counts, nmads=3, type="lower", log=TRUE)
feature.drop <- scater::isOutlier(sce[[i]]$total_features, nmads=3, type="lower", log=TRUE)
filtered <- sce[[i]][,!(libsize.drop | feature.drop)]
drop_table <- rbind(drop_table, data.frame(sample = i, "by_lib_size" = sum(libsize.drop), "by_feature" = sum(feature.drop), "remaining_cells" = ncol(sce[[i]])))
sce[[i]] <- filtered
if (any(libsize.drop) || any(feature.drop)) {
counts(filtered) %>%
as.data.frame() %>%
mutate(ensembl_gene_id = rownames(.)) %>%
select(ensembl_gene_id, everything()) %>%
write_tsv(paste(i, "_counts_matrix_filtered.txt", sep = ""))
}
}
drop_table %>% kable()
# <br><br>
# ## Filtering by library size and number of expressed genes
# Filtering of cells is done based on the library size and number of expressed genes per cell using the Median Absolute Deviation (MAD) (http://www.statisticshowto.com/median-absolute-deviation/)
# drop_table <- data.frame(sample = character(0), "by_lib_size" = numeric(0), "by_feature" = numeric(0), "remaining_cells" = numeric(0))
# for (i in names(sce)) {
# libsize.drop <- scater::isOutlier(sce[[i]]$total_counts, nmads=3, type="lower", log=TRUE)
# feature.drop <- scater::isOutlier(sce[[i]]$total_features, nmads=3, type="lower", log=TRUE)
# filtered <- sce[[i]][,!(libsize.drop | feature.drop)]
# drop_table <- rbind(drop_table, data.frame(sample = i, "by_lib_size" = sum(libsize.drop), "by_feature" = sum(feature.drop), "remaining_cells" = ncol(sce[[i]])))
# sce[[i]] <- filtered
# if (any(libsize.drop) || any(feature.drop)) {
# counts(filtered) %>%
# as.data.frame() %>%
# mutate(ensembl_gene_id = rownames(.)) %>%
# select(ensembl_gene_id, everything()) %>%
# write_tsv(paste(i, "_counts_matrix_filtered.txt", sep = ""))
# }
# }
# drop_table %>% kable()
#' <br><br>
#' ## Check average counts per feature
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment