Commit 4f286ff2 authored by Lena Hersemann's avatar Lena Hersemann

renaming of samples is now optional

parent 322b0c92
......@@ -14,10 +14,16 @@ suppressMessages(require(docopt))
doc = '
Prepare mass spec data for differential analysis
Usage: ms_data_prep.R <ms_data_folder> <renaming_scheme> <design_file>
Usage: ms_data_prep.R [options] <ms_data_folder> <design_file>
Options:
--renaming_scheme <file> Table with two columns with the "old" and "new" names
'
#commandArgs <- function(x) c("../provided/20181122-133344-Barbara-scaffoldDB-all-txt", "../provided/renaming_scheme.xlsx", "exp_design.txt")
#commandArgs <- function(x) c("--renaming_scheme", "../provided/renaming_scheme.xlsx", "../provided/20181122-133344-Barbara-scaffoldDB-all-txt", "exp_design.txt")
#commandArgs <- function(x) c("../provided/20181122-133344-Barbara-scaffoldDB-all-txt", "exp_design.txt")
#commandArgs <- function(x) c("../provided/20181102-Bar-MQ-Scaffold_Holger", "../provided/renaming_scheme.xlsx")
#commandArgs <- function(x) c("proteinGroups", "renaming_scheme.txt", "exp_design.txt")
#commandArgs <- function(x) c(".", "renaming_scheme.xlsx")
......@@ -57,14 +63,31 @@ renaming_scheme <- opts$renaming_scheme
design_file <- opts$design_file
## read in MaxQuant output files (for analysis on protein level: "proteinGroups.txt") and reformat the data
mqTxtFiles = list.files(ms_data_folder, "proteinGroups.txt", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181122-133344-Barbara-scaffoldDB-all-txt"), "proteinGroups.txt", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181102-Bar-MQ-Scaffold_Holger"), "proteinGroups.txt", full=TRUE)
perBatch = mqTxtFiles %>%
map(~ read_tsv(.x) %>% pretty_columns() %>%
select(protein_ids, fasta_headers, matches("^lfq_intensity"), matches("^identification_type"), matches("^intensity"))) %>%
setNames(basename(mqTxtFiles))
## load renaming scheme
# TODO make sure you read in data based on file extension (write function if not available)
if (str_detect(renaming_scheme, ".xls")) {
renamingScheme <- read_xlsx(renaming_scheme)
if (is.null(renaming_scheme)){
orig_names <- perBatch %>% map(~select(.x, starts_with("lfq_intensity")) %>% colnames()) %>% unlist(use.names = FALSE) %>% str_replace(., "lfq_intensity_", "")
renamingScheme <- data.frame(old = orig_names, new = orig_names)
} else {
renamingScheme <- read_tsv(renaming_scheme)
# TODO make sure you read in data based on file extension (write function if not available)
if (str_detect(renaming_scheme, ".xls")) {
renamingScheme <- read_xlsx(renaming_scheme)
} else {
renamingScheme <- read_tsv(renaming_scheme)
}
}
oldNames <- str_c(renamingScheme$old, collapse = "|")
## load design_file
......@@ -72,16 +95,7 @@ expDesign <- read_tsv(design_file)
if (all(expDesign$replicate != renamingScheme$new)) {stop("ATTENTION: replicates of contrast and design file do not match")}
## read in MaxQuant output files (for analysis on protein level: "proteinGroups.txt") and reformat the data
mqTxtFiles = list.files(ms_data_folder, "proteinGroups.txt", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181122-133344-Barbara-scaffoldDB-all-txt"), "proteinGroups.txt", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181102-Bar-MQ-Scaffold_Holger"), "proteinGroups.txt", full=TRUE)
perBatch = mqTxtFiles %>%
map(~ read_tsv(.x) %>% pretty_columns() %>%
select(protein_ids, fasta_headers, matches("^lfq_intensity"), matches("^identification_type"), matches("^intensity"))) %>%
setNames(basename(mqTxtFiles))
# list input arguments
vec_as_df(unlist(opts)) %>%
filter(! str_detect(name, "^[<-]")) %>%
rbind(c("input_file_num", length(mqTxtFiles))) %>%
......@@ -92,6 +106,7 @@ vec_as_df(unlist(opts)) %>%
#'
#' renaming scheme:
if (is.null(renaming_scheme)){ print("Original naming kept; renaming scheme was not provided") }
renamingScheme %>% setNames(c("old names", "new names")) %>% kable()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment