Commit 608e4f90 authored by Lena Hersemann's avatar Lena Hersemann

changed plotting of NA values; changed pattern detection for the names of proteinGroups files

parent ecdd68b1
......@@ -61,7 +61,7 @@ design <- opts$design
## read in MaxQuant output files (for analysis on protein level: "proteinGroups.txt") and reformat the data
mqTxtFiles = list.files(ms_data_folder, "proteinGroups.txt", full=TRUE)
mqTxtFiles = list.files(ms_data_folder, "proteinGroups", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181122-133344-Barbara-scaffoldDB-all-txt"), "proteinGroups.txt", full=TRUE)
# mqTxtFiles = list.files(interp_from_env("${PRJ_DATA}/provided/20181102-Bar-MQ-Scaffold_Holger"), "proteinGroups.txt", full=TRUE)
......@@ -382,18 +382,8 @@ names(msData) %<>% str_replace_all(., oldNames, newNames)
#' ### Missing values per sample
## plot NA proportion based on the DataExplorer function profile_missing()
load_pack(DataExplorer)
#plot_missing(msData) +
# TODO: adjust prercentages
missing_value <- profile_missing(msData[, which(colnames(msData) != "protein_ids")])
ggplot(missing_value, aes_string(x = "feature", y = "num_missing", fill = "group")) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(100 * pct_missing, 2), "%"))) +
scale_fill_manual("Group", values = c("Good" = "cadetblue", "OK" = "cadetblue3", "Bad" = "coral1", "Remove" = "coral3"), breaks = c("Good", "OK", "Bad", "Remove")) +
theme(legend.title = element_text(size=14, face="bold")) +
coord_flip() +
xlab("Features") + ylab("Missing Rows")
#TODO: ajdust percentages
plot_missing(msData[, which(colnames(msData) != "protein_ids")])
na_prop <- msData %>% gather(sample, intensity, -protein_ids) %$% { sum(is.na(intensity)/length(intensity))}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment