Commit 0e7aabb4 authored by Lena Hersemann's avatar Lena Hersemann

added additional gene information of top50 most abundant genes; gene...

added additional gene information of top50 most abundant genes; gene information are exported if requested
parent 2d05c6c0
......@@ -64,7 +64,7 @@ get.cellcycl.phase <- function(x, annot.db){
counts_file <- opts$count_data
cell_infos_file <- opts$cell_infos
prefix <- opts$file_prefix
gene_info <- opts$gene_info
export_gene_info <- opts$gene_info
countData <- fread(counts_file)
......@@ -87,6 +87,25 @@ rm(countData)
rm(countMatrix)
# get gene information if requested
if(any(str_detect(rownames(counts(sce)), "^FBgn|^ENSCAFG|^ENSMUSG|^ENSDARG|^ENSPTRG|^ENSG|^ENSCGRG"))) {
db <- guess_anno_db(rownames(counts(sce))[1])
} else {
db <- ""
}
# if ensembl_gene_ids were provided, information on gene name, description and coordinates will be downloaded from biomaRt
if (nchar(db) > 0) {
mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = guess_mart(rownames(counts(sce))[1]), host = paste0("oct2018.archive.ensembl.org"), path = "/biomart/martservice", archive = FALSE)
gene_info = biomaRt::getBM(mart = mart, c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") ) %>% tbl_df
} else if (nchar(db) == 0){
gene_info = data.frame(gene_id = rownames(rowData(sce)))
}
if (export_gene_info == TRUE) { gene_info %>% write_tsv("gene_information.txt") }
# Quality control and filtering --------------------------------------------------------------
#' Due to low quantities of RNA in single cells, scRNAseq data are much noiser than bulk RNAseq data. Here we perform
......@@ -222,6 +241,9 @@ scater::calcAverage(sce) %>%
scater::plotQC(sce, type = "highest-expression", exprs_values = "counts")
top50 <- rowData(sce) %>% as.data.frame() %>% transmute(gene_id = rownames(.), total_counts) %>% arrange(desc(total_counts)) %>% slice(1:50)
gene_info %>% right_join(top50, by = c("ensembl_gene_id" = "gene_id")) %>% arrange(desc(total_counts)) %>% transmute(gene_id = ensembl_gene_id, gene_name = external_gene_name, chr = chromosome_name, start = start_position, stop = end_position, description) %>% table_browser()
#' <br><br>
#'
......@@ -263,12 +285,6 @@ scater::plotExplanatoryVariables(sce, variables = c(colnames(cell_infos)))
#https://www.bioconductor.org/packages/devel/bioc/vignettes/AnnotationDbi/inst/doc/IntroToAnnotationPackages.pdf
if(any(str_detect(rownames(counts(sce)), "^FBgn|^ENSCAFG|^ENSMUSG|^ENSDARG|^ENSPTRG|^ENSG|^ENSCGRG"))) {
db <- guess_anno_db(rownames(counts(sce))[1])
} else {
db <- ""
}
if (db == "org.Mm.eg.db"){
cc.pairs <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package="scran"))
} else if (db == "org.Hs.eg.db") {
......@@ -306,16 +322,6 @@ if (exists("cc.pairs")) {
}
# if ensembl_gene_ids were provided, information on gene name, description and coordinates will be downloaded from biomaRt
if (gene_info == TRUE & nchar(db) > 0) {
mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = guess_mart(rownames(counts(sce))[1]), host = paste0("oct2018.archive.ensembl.org"), path = "/biomart/martservice", archive = FALSE)
gene_info = biomaRt::getBM(mart = mart, c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") ) %>% tbl_df
gene_info %>% write_tsv("gene_information.txt")
} else if (gene_info == TRUE & nchar(db) == 0){
gene_info = data.frame(gene_id = rownames(rowData(sce)))
gene_info %>% write_tsv("gene_information.txt")
}
colData(sce) %>% as.data.frame() %>% write_tsv("scater_quality_metrics.txt")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment