Commit d47a0970 authored by Lena Hersemann's avatar Lena Hersemann

added code to source diffex_commons.R; added option to download gene...

added code to source diffex_commons.R; added option to download gene information based on the count matrix rownames; cosmetics
parent 3ac57df7
......@@ -15,15 +15,16 @@ suppressMessages(require(docopt))
doc = '
Quality control and filtering of scRNAseq data
Usage: sc_quality_check.R [options] <star_counts_matrix> <design>
Usage: sc_quality_check.R [options] <count_data> <cell_infos>
Options:
--file_prefix <sample_name> add prefix to output files if storing output of multiple analysis in one folder [default: ]
--gene_info <gene_info> export gene information [default: FALSE]
'
# commandArgs <- function(x) c("../segerstolpe_counts.txt", "../segerstolpe_infos.txt")
# commandArgs <- function(x) c("--gene_info", "TRUE", "counts_matrix.txt", "cell_infos.txt")
opts = docopt(doc, commandArgs(TRUE))
......@@ -32,6 +33,7 @@ opts = docopt(doc, commandArgs(TRUE))
#https://www.bioconductor.org/help/workflows/simpleSingleCell/
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.40/R/core_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.45/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/bio/diffex_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/ngs_tools/raw/v6/common/cp_utils.R")
load_pack(SingleCellExperiment)
load_pack(knitr)
......@@ -61,9 +63,10 @@ get.cellcycl.phase <- function(x, annot.db){
# HANDLE input data -------------------------------------------------------------------------------
counts_file <- opts$star_counts_matrix
design_file <- opts$design
counts_file <- opts$count_data
cell_infos_file <- opts$cell_infos
prefix <- opts$file_prefix
gene_info <- opts$gene_info
countData <- fread(counts_file)
......@@ -73,13 +76,13 @@ countMatrix <- countData %>%
countMatrix <- countMatrix[rowSums(countMatrix)>0, ]
design <- fread(design_file) %>% as.data.frame()
cell_infos <- fread(cell_infos_file) %>% as.data.frame()
if (ncol(countMatrix) != nrow(design)) {print("Count matrix and design have different sample numbers")}
if (ncol(countMatrix) != nrow(cell_infos)) {print("Count matrix and cell_infos have different sample numbers")}
# create SingleCellExperiment object:
sce <- SingleCellExperiment(assays = list(counts=countMatrix), colData = design)
sce <- SingleCellExperiment(assays = list(counts=countMatrix), colData = cell_infos)
rm(countData)
......@@ -250,7 +253,7 @@ summary(sce$outlier)
# log-normalization
sce <- normalize(sce)
scater::plotExplanatoryVariables(sce, variables = c(colnames(design)))
scater::plotExplanatoryVariables(sce, variables = c(colnames(cell_infos)))
#' <br><br>
......@@ -288,11 +291,11 @@ if (exists("cc.pairs")) {
# export cell cycle data with its actual data...
data %>% write_tsv(paste(prefix, "cell_cycle_phases.txt", sep = ""))
# ...and only the phase as additional batch effect in the design file
design %>%
# ...and only the phase as additional batch effect in the cell_infos file
cell_infos %>%
left_join(data, by = "cell_id") %>%
select(-G1, -S, -G2M) %>%
write_tsv(paste(prefix, "basic_design_incl_ccp.txt", sep = ""))
write_tsv(paste(prefix, "basic_cell_infos_incl_ccp.txt", sep = ""))
# plot data
cc_plot <- ggplot(data, aes(G1, G2M, color = phase)) +
......@@ -307,14 +310,15 @@ if (exists("cc.pairs")) {
# if ensembl_gene_ids were provided, information on gene name, description and coordinates will be downloaded from biomaRt
if (nchar(db) > 0) {
if (gene_info == TRUE & nchar(db) > 0) {
mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = guess_mart(rownames(counts(sce))[1]), host = paste0("oct2018.archive.ensembl.org"), path = "/biomart/martservice", archive = FALSE)
gene_info = biomaRt::getBM(mart = mart, c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") ) %>% tbl_df
} else {
gene_info %>% write_tsv("gene_information.txt")
} else if (gene_info == TRUE & nchar(db) == 0){
gene_info = data.frame(gene_id = rownames(rowData(sce)))
gene_info %>% write_tsv("gene_information.txt")
}
gene_info %>% write_tsv("gene_information.txt")
colData(sce) %>% as.data.frame() %>% write_tsv("scater_quality_metrics.txt")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment