From 797c616b59308d3e5af629a7be1448355ba6390c Mon Sep 17 00:00:00 2001 From: domingue Date: Wed, 6 Nov 2019 11:46:00 +0100 Subject: [PATCH] removed file added by mistake --- ms_workflow/03-filtering_aneuploid.R | 98 ---------------------------- 1 file changed, 98 deletions(-) delete mode 100644 ms_workflow/03-filtering_aneuploid.R diff --git a/ms_workflow/03-filtering_aneuploid.R b/ms_workflow/03-filtering_aneuploid.R deleted file mode 100644 index c69f7cf..0000000 --- a/ms_workflow/03-filtering_aneuploid.R +++ /dev/null @@ -1,98 +0,0 @@ -# ========================================================================== -# Setup -# ========================================================================== -library("corescf") -library("data.table") -library("dplyr") -library("ComplexHeatmap") -library("stringr") -library("ggplot2") -library("ggridges") -library("heatmaply") -library("RColorBrewer") - -# ========================================================================== -# Data -# ========================================================================== -dir.create(results_dir, showWarnings = FALSE, recursive = TRUE) -data_dir <- paste0(Sys.getenv("PRJ_DATA"), "/data") -results_dir <- paste0(Sys.getenv("PRJ_DATA"), "/aneuploid_cells") - -dloup_file <- paste0( - data_dir, - "/all_C13_mother-group_876-region_chr1_1-5120000_chrY_56320001-57227415-heatmap_copy_number.csv" -) - -dloup <- fread(dloup_file) %>% - mutate(node_id = as.character(node_id)) -# colnames(dloup)[1:10] - -# -# metadata -# -------------------------------------------------------------------------- -metadata <- dloup %>% - select(node_id, barcodes, num_cells, num_noisy) - -cells <- dloup %>% - select(-c("barcodes", "num_cells", "num_noisy")) - -# ========================================================================== -# Filtering -# ========================================================================== -cells_long <- cells %>% - gather("segment", "copy_number", -node_id) %>% - mutate(chromosome = gsub("(.*):.*", "\\1", segment)) %>% - filter(chromosome != "chrY") - -# head(cells_long) - -# -# mean ploidy -# -------------------------------------------------------------------------- -aneuploid_cells <- cells_long %>% - group_by(node_id, chromosome) %>% - arrange(node_id, chromosome) %>% - mutate( - is_segment_missing = ifelse(copy_number < 0.8, "yes", "no") - ) %>% - summarize( - mean_ploidy = mean(copy_number, na.rm = TRUE), - Frac_intact_chromosome = sum(is_segment_missing == "no", na.rm = TRUE)/n(), - is_chromosome_missing = ifelse(Frac_intact_chromosome < 0.1, "yes", "no") - ) %>% filter(is_chromosome_missing == "yes") - ungroup() %>% - group_by(node_id) %>% - mutate( - mean_ploidy = round(mean_ploidy, 2), - is_abnormal = ifelse(mean_ploidy <= 1.75 | mean_ploidy >= 4, "yes", "no"), - N_Complete_Karyotype = sum(is_abnormal == "no"), - Frac_Complete_Karyotype = sum(is_abnormal == "no") / n() - ) %>% - filter(is_abnormal == "yes") - -aneuploid_id <- unique(aneuploid_cells$node_id) - -# -# Visualization -# -------------------------------------------------------------------------- -mat <- cells %>% - as.data.frame() %>% - column2rownames("node_id") %>% - as.matrix() - -mat_filt <- mat[aneuploid_id, ] - -Heatmap( - mat_filt, - cluster_columns = FALSE, - # col = binary_cols, - show_column_names = FALSE - # column_split = noisy_lab, - # row_split = noisy_lab -) - -# ========================================================================== -# Reproducibility -# ========================================================================== -session::save.session(".03-filtering_aneuploid.dat") -devtools::session_info() \ No newline at end of file -- GitLab