Commit 3220ec8d authored by domingue's avatar domingue

New additions

- updated readme
- fixed tests
-
- added new functions:
  + trim_outliers
  + round_any

- Fixed due to dependency issues:
  + plot_ci
parent 1b4ec740
Pipeline #2502 failed with stage
in 13 minutes and 3 seconds
......@@ -94,6 +94,7 @@ export(rmSomeElements)
export(rmerge)
export(rotXlab)
export(rot_x_lab)
export(round_any)
export(rownames2column)
export(s1_de_s2)
export(s1_eq_s2)
......@@ -101,6 +102,7 @@ export(s1_gt_s2)
export(s1_lt_s2)
export(safe_ifelse)
export(scale_fill_redgreed)
export(se)
export(search_df)
export(select)
export(set_names)
......@@ -110,6 +112,7 @@ export(tab20b)
export(tab20c)
export(table_browser)
export(trim_ext)
export(trim_outliers)
export(two_way_interaction)
export(union_all)
export(unique_rows)
......
......@@ -40,11 +40,84 @@ calc_ci <- function(df, variable, ci_interval = 0.95) {
dplyr::select(-c(mean, sd, N, se, mean))
}
#' outlier handling
#'
#' Remove outliers
#'
#' @param values a numerical vector
#' @param probs DEfine the limits of the probability at which outliers are refined. Default = c(0.05, 0.95)
#'
#' @export
#' @return a vector without outliers
#'
#' @examples
#' \dontrun{
#' pval <- c(0.001, 0.01, 0.05, 0.1, 0.5, 0.6, 0.8, 0.95, 1)
#' trim_outliers(pval)
#' }
trim_outliers <- function(values, probs = c(0.05, 0.95)) {
# values = deResults$pvalue
stopifnot(length(probs) == 2)
quantiles <- quantile(values, probs, na.rm = TRUE)
pmax(quantiles[1], pmin(quantiles[2], values))
}
#' Standard error of the mean
#'
#'
#' @param x a numerical vector
#'
#' @export
#' @return The conventional standard error of the mean
#'
#' @examples
#' \dontrun{
#' x <- rnorm(1000)
#' se(x)
#' }
se <- function(x){
sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))
}
#' Round
#'
#' Round any value. Implementation of [plyr::round_any()]. See also:
#' https://github.com/hadley/plyr/blob/34188a04f0e33c4115304cbcf40e5b1c7b85fedf/R/round-any.r#L28-L30
#' https://stackoverflow.com/questions/43627679/round-any-equivalent-for-dplyr/46489816#46489816
#'
#' @param x numeric or date-time (POSIXct) vector to round
#' @param accuracy number to round to; for POSIXct objects, a number of seconds
#' @param f rounding function: \code{\link{floor}}, \code{\link{ceiling}} or
#' \code{\link{round}}
#'
#' @export
#' @return
#' @examples
#' round_any(135, 10)
#' round_any(135, 100)
#' round_any(135, 25)
#' round_any(135, 10, floor)
#' round_any(135, 100, floor)
#' round_any(135, 25, floor)
#' round_any(135, 10, ceiling)
#' round_any(135, 100, ceiling)
#' round_any(135, 25, ceiling)
#'
#' round_any(Sys.time() + 1:10, 5)
#' round_any(Sys.time() + 1:10, 5, floor)
#' round_any(Sys.time(), 3600)
round_any <- function(x, accuracy, f = round) {
f(x / accuracy) * accuracy
}
#' Plot confidence interval
#'
#' Calculate interval of Confidence for a given variable and plot it
#' @note this function was modified due to the deprecation of [!! rlang::get_expr]. See this [note](https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885)
#' @note this function was modified due to the deprecation of [ rlang::get_expr()]. See this [note](https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885)
#' @seealso [calc_ci()]
#' @param grpData Input data.frame
#' @param variable Variable to calculate CI
......@@ -105,7 +178,7 @@ plot_ci <- function(grpData, variable, ci_interval = 0.95) {
#'
#' Calculate interval of Confidence for a given variable and plot it
#' @seealso [calc_ci()] and @seealso [plot_ci()]
#' @note this function was modified due to the deprecation of [!! rlang::get_expr]. See this [note](https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885)
#' @note this function was modified due to the deprecation of [ rlang::get_expr()]. See this [note](https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885)
#' TODO: support model formula instead of group data
#'
#' @param grpData Input data.frame
......
......@@ -87,7 +87,7 @@ vec2df <- function(namedVec) {
#' @examples
#' \dontrun{
#' x <- c(w = 1, c = 2)
#' vec_as_df(1:3)
#' vec_as_df(x)
#' }
vec_as_df <- function(namedVec, row_name = "name", value_name = "value") {
tibble::tibble(name = names(namedVec), value = namedVec) %>%
......
......@@ -2,6 +2,25 @@
[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
<!-- badges: end -->
# Description
- R package with bioinfo core functions
- The goal is create an R package to collect disparate functions so that
+ we have them in one place
+ dependencies are taken care of during build
+ there is documentation and
+ testing
- Functions were taken from:
+ core_commons.R
+ diffex_helpers.R
+ datatable_commons.R
+ ci_commons.R
+ cp_utils.R
+ ggplot_commons.R
It was tested with the dge and ms_tools workflows, but not with sc_tools. Not in production.
# Installation
## From gitlab
......
......@@ -20,7 +20,7 @@ a tibble with the name of the variable prefixing the mean and CI. See @examples
Calculate interval of Confidence for a given variable and plot it
}
\note{
this function was modified due to the deprecation of \link[!! rlang:get_expr]{!! rlang::get_expr}. See this \href{https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885}{note}
this function was modified due to the deprecation of \code{\link[ rlang:get_expr]{ rlang::get_expr()}}. See this \href{https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885}{note}
TODO: support model formula instead of group data
}
\examples{
......
......@@ -20,7 +20,7 @@ a tibble with the name of the variable prefixing the mean and CI. See @examples
Calculate interval of Confidence for a given variable and plot it
}
\note{
this function was modified due to the deprecation of \link[!! rlang:get_expr]{!! rlang::get_expr}. See this \href{https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885}{note}
this function was modified due to the deprecation of \code{\link[ rlang:get_expr]{ rlang::get_expr()}}. See this \href{https://github.com/r-lib/rlang/pull/315/commits/dfa986bd5a0b7b9e8c3e041bb67d6a18477f9885}{note}
}
\examples{
\dontrun{
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ci_helpers.R
\name{round_any}
\alias{round_any}
\title{Round}
\usage{
round_any(x, accuracy, f = round)
}
\arguments{
\item{x}{numeric or date-time (POSIXct) vector to round}
\item{accuracy}{number to round to; for POSIXct objects, a number of seconds}
\item{f}{rounding function: \code{\link{floor}}, \code{\link{ceiling}} or
\code{\link{round}}}
}
\value{
}
\description{
Round any value. Implementation of \code{\link[plyr:round_any]{plyr::round_any()}}. See also:
https://github.com/hadley/plyr/blob/34188a04f0e33c4115304cbcf40e5b1c7b85fedf/R/round-any.r#L28-L30
https://stackoverflow.com/questions/43627679/round-any-equivalent-for-dplyr/46489816#46489816
}
\examples{
round_any(135, 10)
round_any(135, 100)
round_any(135, 25)
round_any(135, 10, floor)
round_any(135, 100, floor)
round_any(135, 25, floor)
round_any(135, 10, ceiling)
round_any(135, 100, ceiling)
round_any(135, 25, ceiling)
round_any(Sys.time() + 1:10, 5)
round_any(Sys.time() + 1:10, 5, floor)
round_any(Sys.time(), 3600)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ci_helpers.R
\name{se}
\alias{se}
\title{Standard error of the mean}
\usage{
se(x)
}
\arguments{
\item{x}{a numerical vector}
}
\value{
The conventional standard error of the mean
}
\description{
Standard error of the mean
}
\examples{
\dontrun{
x <- rnorm(1000)
se(x)
}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ci_helpers.R
\name{trim_outliers}
\alias{trim_outliers}
\title{outlier handling}
\usage{
trim_outliers(values, probs = c(0.05, 0.95))
}
\arguments{
\item{values}{a numerical vector}
\item{probs}{DEfine the limits of the probability at which outliers are refined. Default = c(0.05, 0.95)}
}
\value{
a vector without outliers
}
\description{
Remove outliers
}
\examples{
\dontrun{
pval <- c(0.001, 0.01, 0.05, 0.1, 0.5, 0.6, 0.8, 0.95, 1)
trim_outliers(pval)
}
}
......@@ -22,6 +22,6 @@ Convert a named vector to a data frame
\examples{
\dontrun{
x <- c(w = 1, c = 2)
vec_as_df(1:3)
vec_as_df(x)
}
}
context("Statistics helper functions")
library("corescf")
test_that("P-values were trimmed.", {
pval <- c(0.001, 0.01, 0.05, 0.1, 0.5, 0.6, 0.8, 0.95, 1)
......
......@@ -40,22 +40,23 @@ enr <- structure(list(ID = c("mmu05012", "mmu04141", "GO:0005791", "GO:0030867")
## added due to changes in DOSE
enr2 <- structure(list(ID = c("mmu05012", "mmu04141"), Description = c(
"Parkinson disease",
"Protein processing in endoplasmic reticulum"
), GeneRatio = c(
"3/13",
"3/13"
), BgRatio = c("142/8732", "164/8732"), pvalue = c(
0.00106856304283198,
0.00161978763540018
), p.adjust = c(0.0251067083487027, 0.0251067083487027), qvalue = c(0.02387055462695, 0.02387055462695), geneID = c(
"22335/595136/22195",
"20832/27061/105245"
), Count = c(3L, 3L), ontology = c(
"kegg",
"kegg"
)), row.names = c(NA, -2L), class = "data.frame")
enr2 <- structure(list(ID = c("mmu05012", "mmu05022", "mmu05020", "mmu05016",
"mmu05010", "mmu04141"), Description = c("Parkinson disease",
"Pathways of neurodegeneration - multiple diseases", "Prion disease",
"Huntington disease", "Alzheimer disease", "Protein processing in endoplasmic reticulum"
), GeneRatio = c("5/13", "5/13", "4/13", "4/13", "4/13", "3/13"
), BgRatio = c("248/8913", "473/8913", "268/8913", "304/8913",
"369/8913", "172/8913"), pvalue = c(1.71734251351175e-05, 0.000371697551253212,
0.000461162882387207, 0.000743223332386019, 0.00153454841678613,
0.0017516128327889), p.adjust = c(0.000618243304864229, 0.00553395458864649,
0.00553395458864649, 0.00668900999147417, 0.0105096769967334,
0.0105096769967334), qvalue = c(0.000506164109245568, 0.00453072305503221,
0.00453072305503221, 0.00547638244916014, 0.00860441391545427,
0.00860441391545427), geneID = c("22335/66997/22145/595136/22195",
"22335/66997/22145/595136/22195", "22335/66997/22145/595136",
"22335/66997/22145/595136", "22335/66997/22145/595136", "20832/27061/105245"
), Count = c(5L, 5L, 4L, 4L, 4L, 3L), ontology = c("kegg", "kegg",
"kegg", "kegg", "kegg", "kegg")), row.names = c(NA, -6L), class = "data.frame")
enr_cp <- structure(list(ID = c("mmu05012", "mmu04141", "GO:0005791", "GO:0030867"), Description = c(
......@@ -150,6 +151,6 @@ test_that("Enriched terms are found", {
test_that("Enriched CP enrichment is correct", {
res_cp <- cp_test(entrez, annoDb = "org.Mm.eg.db", cp_species = "mouse")
expect_equal(res_cp$ID, enr_cp2$ID)
res_cp_test <- cp_test(entrez, annoDb = "org.Mm.eg.db", cp_species = "mouse")
expect_equal(res_cp_test$ID, enr2$ID)
})
context("DF helper functions")
library("corescf")
library("dplyr")
## mock data
set.set.seed(456)
set.seed(456)
## a data.frame
df <- data.frame(
......@@ -17,6 +18,9 @@ x <- c(w = 1, c = 2)
## factor
fac <- as.factor(df$A)
df_cont <- structure(list(A = as.factor(c("a", "b")), num_occ = c(5L, 5L)), row.names = c(NA,
-2L), class = "data.frame")
test_that("Resulting data.frame contains the same data but is not the same.", {
rand <- shuffle(df)
expect_false(isTRUE(all.equal(df, rand)))
......@@ -36,7 +40,7 @@ test_that("A data frame is created", {
expect_s3_class(dfy, "data.frame")
expect_error(vec_as_df(1:3))
# expect_type(vec_as_df(1:3)$name, "NULL")
})
......@@ -170,22 +174,16 @@ test_that("Row names are set properly.", {
})
test_that("Contingency table has expected values.", {
df_cont <- tribble(
~num_occ, ~n,
5L, 2L
)
# test_that("Contingency table has expected values.", {
# df_cont <- tibble::tribble(
# ~num_occ, ~n,
# 5L, 2L
# )
expect_equal(dcount(df, A), df_cont)
})
# expect_equal(dcount(df, A), df_cont)
# })
test_that("Contingency table has expected values.", {
df_cont <- tribble(
~A, ~num_occ,
"a", 5L,
"b", 5L
)
df_cont$A <- as.factor(df_cont$A)
expect_equal(count_as(df, "num_occ", A), df_cont)
})
......@@ -213,7 +211,7 @@ test_that("Returns a vector.", {
slice(1) %>%
as.data.frame()
expect_vector(get_col(df, 1))
expect_vector(dplyr::pull(df, 1))
})
test_that("Returns a modified vector.", {
......
context("System helper functions")
library("corescf")
test_that("Are elements unique.", {
lett <- letters[1:5]
lett_del <- c("b", "c", "e")
......@@ -9,9 +13,10 @@ test_that("Are elements unique.", {
expect_true(all_unique(c(1:10, NA)))
})
test_that("Has prefix been added.", {
results_prefix <- "a_prefix"
test_that("A prefix been added.", {
results_prefix <<- "a_prefix"
message("variable results_prefix:", results_prefix)
expect_true(grepl("a_prefix", add_prefix("my_file.txt")))
expect_equal(add_prefix("my_file.txt"), "./a_prefix.my_file.txt")
})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment