Commit 94c80e1e authored by Lena Hersemann's avatar Lena Hersemann

fixed bugs: now only unique CON, REV and STANDARD entries are removed from the...

fixed bugs: now only unique CON, REV and STANDARD entries are removed from the intensities matrix; old_protein_ids are now included in the right_join when summarizing sample information; file name of exported sample information is now corrected
parent ced15313
......@@ -231,7 +231,7 @@ sample_info <- perBatch %>% map_df(~ select(.x, -starts_with("identification_typ
if (any(map(perBatch, ~colnames(.x) %>% str_detect(., "identification_type")) %>% unlist())) {
sample_info <- perBatch %>% map_df(~ select(.x, -starts_with("lfq_intensity")), .id = 'GROUP') %>% gather(sample, identification_type, starts_with("identification_type")) %>% mutate(sample = str_replace_all(sample, "identification_type_", "")) %>% mutate(oldName = sample) %>% rename(file_name = GROUP) %>%
# right_join(sample_info, by = c("sample", "file_name", "oldName", "protein_ids", "protein_acc", "scrap", "fasta_headers", "init_groups"))
right_join(sample_info, by = c("sample", "file_name", "oldName", "protein_ids", "scrap", "fasta_headers"))
right_join(sample_info, by = c("sample", "file_name", "oldName", "protein_ids", "old_protein_ids", "scrap", "fasta_headers"))
ident_types <- TRUE
print("Identification type data were provided")
} else {
......@@ -240,7 +240,7 @@ if (any(map(perBatch, ~colnames(.x) %>% str_detect(., "identification_type")) %>
# TODO: find faster solution for renaming
sample_info$sample %<>% str_replace_all(., oldNames, newNames)
write_tsv(sample_info, paste0(results_prefix, "feature_sample_information.txt"))
write_tsv(sample_info, paste0(results_prefix, ".feature_sample_information.txt"))
#'
......@@ -329,12 +329,14 @@ write_tsv(msData, path=add_prefix("lfq_incl_ctrls.txt"))
#'
#' ### Control removal
#' In this step, unique CON__ REV__ and STANDARD entries are removed. So far, biased entries with simultaneous CON__ and protein accessions are kept.
msData %>% filter(is_control(protein_ids)) %>% select(protein_ids) %>% DT::datatable(caption="controls removed from data")
msData %<>% mutate(is_scrap = is_control(protein_ids) & !str_detect(protein_ids, ";"))
msData %>% filter(is_scrap) %>% select(protein_ids) %>% DT::datatable(caption="controls removed from data")
tribble(~intial_data, ~filtered_data, ~removed_rows,
nrow(msData), nrow(filter(msData, !is_control(protein_ids))), nrow(filter(msData, is_control(protein_ids)))) %>% kable()
nrow(msData), nrow(filter(msData, !is_scrap)), nrow(filter(msData, is_scrap))) %>% kable()
msData %<>% filter(!is_control(protein_ids))
msData %<>% filter(!is_scrap) %>% select(-is_scrap)
stopifnot(nrow(filter(msData, str_length(protein_ids)==0)) ==0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment