Commit 0e1a29d8 authored by Lena Hersemann's avatar Lena Hersemann

added option to provide custom gene list for PCA and clustering

parent 05ee5f64
......@@ -18,12 +18,14 @@ Usage: seurat_analysis.R [options] <count_matrix> <annot_data> <settings>
Options:
--remove_outliers <boolean> whether outlier cells should be removed [default: TRUE]
--cc_markers <file> list of cell cycle markers if you would like to regress out cell cycle phase
--gene_list <file> file with a column called gene_ids with ensembl IDs which shall be used for clustering
'
# commandArgs = function(x) c("../metrices/SC2_star_counts_matrix.txt", "../quality_check/SC2_basic_design_incl_ccp.txt", "settings.txt")
# commandArgs = function(x) c("../seurat_analysis_without_blood/count_matrix_without_blood.txt", "../seurat_analysis_without_blood/basic_design_without_blood.txt", "settings.txt")
# commandArgs = function(x) c("../segerstolpe_counts.txt, ../segerstolpe_infos.txt")
# commandArgs = function(x) c("--cc_markers", "../../regev_lab_cell_cycle_genes.txt", "../count_matrix_without_blood.txt", "../annot_data.txt", "settings.txt")
# commandArgs = function(x) c("--gene_list", "go_transcription_factors.txt", "count_matrix.txt", "scater_quality_metrices.txt", "settings.txt")
opts = docopt(doc, commandArgs(TRUE))
......@@ -53,6 +55,7 @@ annot_data_file = opts$annot_data
settings_file = opts$settings
remove_outliers = opts$remove_outliers
cc_markers = opts$cc_markers
gene_list = opts$gene_list
vec_as_df(unlist(opts)) %>%
filter(! str_detect(name, "^[<-]")) %>%
......@@ -205,10 +208,18 @@ seo <- ScaleData(object = seo)
# save scaled counts
scaled_counts <- seo@scale.data
if (!is.null(gene_list) & file.exists(gene_list)) {
selected_genes = read_tsv(gene_list) %$% gene_id
print(paste0("genes from the ", gene_list, " will be used for PCA"))
} else {
selected_genes = seo@var.genes
print("highly variable genes will be used for PCA")
}
## check cell cycle information
# mouse cell cycle information can be downloaded here: https://www.dropbox.com/s/3dby3bjsaf5arrw/cell_cycle_vignette_files.zip?dl=1
if (any(str_detect(regress_out, "phase")) & file.exists(cc_markers)) {
if (all(str_detect(regress_out, "phase") | !is.null(cc_markers))) {
print("cell cycle information found and used to regress out cell cycle phase")
......@@ -219,7 +230,7 @@ if (any(str_detect(regress_out, "phase")) & file.exists(cc_markers)) {
g2m.genes <- cc.genes[44:97,]$ensembl_gene_id
# Run PCA and check the loading for cell cycle related genes
seo <- RunPCA(object = seo, pc.genes = seo@var.genes, do.print = F)
seo <- RunPCA(object = seo, pc.genes = selected_genes, do.print = F)
gene.loadings <- GetDimReduction(
object = seo,
......@@ -261,7 +272,7 @@ print(paste0(c("Variables to regress out: ", vars_to_regress), collapse = " "))
## run PCA -------------------------------------------------------------------------------------------------------------
# pc = 50
pc = as.numeric(sop[which(sop$parameter == "select_pc"),]$settings)
seo <- RunPCA(object = seo, pc.genes = seo@var.genes, do.print = TRUE, pcs.compute = pc, pcs.print = 0, genes.print = 0)
seo <- RunPCA(object = seo, pc.genes = selected_genes, do.print = TRUE, pcs.compute = pc, pcs.print = 0, genes.print = 0)
#'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment