From 305be2b24043442fd3988a32a73bab7c23ef18fa Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Thu, 14 Aug 2014 10:29:14 +0200 Subject: [PATCH] cont. new r utils --- R/core_commons.R | 103 +++++++++++++++++++++++++++++++++++----- R/datatable_utils.R | 2 +- R/df_utils.R | 77 ------------------------------ R/ggplot/ggplot_utils.R | 1 + 4 files changed, 92 insertions(+), 91 deletions(-) delete mode 100644 R/df_utils.R diff --git a/R/core_commons.R b/R/core_commons.R index fa8937b..0e95ac1 100644 --- a/R/core_commons.R +++ b/R/core_commons.R @@ -20,35 +20,37 @@ require.auto <- function(x){ } } +#qlibrary <- function(libname) {library(as.character(substitute(libname)), quietly=T, warn.conflicts=F, character.only=T )} + +## set a default cran r mirror +#cat(".Rprofile: Setting Goettingen repository\n") +r = getOption("repos") # hard code the UK repo for CRAN
 +r["CRAN"] = "http://ftp5.gwdg.de/pub/misc/cran/" +options(repos = r) +rm(r) + + +## load core packages require.auto(plyr) require.auto(stringr) require.auto(reshape2) #require.auto(reshape2, quietly=T, warn.conflicts=F) -require.auto(scales) +## load on purpose after plyr require.auto(dplyr) - #require.auto(data.table) +## user browser for help options(help_type="html") + +## plot more characters per line options(width=150) # for sqldf to avoid the use of tckl options(gsubfn.engine = "R") -##### set the r mirror -#cat(".Rprofile: Setting Goettingen repository\n") -r = getOption("repos") # hard code the UK repo for CRAN
 -r["CRAN"] = "http://ftp5.gwdg.de/pub/misc/cran/" -options(repos = r) -rm(r) - - - - -#qlibrary <- function(libname) {library(as.character(substitute(libname)), quietly=T, warn.conflicts=F, character.only=T )} @@ -77,6 +79,81 @@ unlen <- function(x) length(unique(x)) pp <- function(dat) page(dat, method = "print") +######################################################################################################################## +#### data.frame manipulation + +subsample <- function(df, sampleSize, ...){ + df[sample(1:nrow(df), min(sampleSize, nrow(df)), ...),] +} + +shuffle <- function(df) df[sample(nrow(df)),] + +first <- function(x, n=1) head(x,n) + +as.df <- function(dt) as.data.frame(dt) + + + +rownames2column <- function(df, colname){ + df <- as.df(df) + df$tttt <- rownames(df); + rownames(df) <- NULL; + rename(df, c(tttt=colname)) +} + + +column2rownames<- function(df, colname){ +#browser() + rownames(df) <- ac(df[,colname]) + df[colname] <- NULL + return(df) +} + +## pushing some columns to the right end of a data.frame +push_right <- function(df, pushColNames){ + df[,c(setdiff(names(df),pushColNames), pushColNames)] +} + + +push_left <- function(df, pushColNames){ + df[,c(pushColNames, setdiff(names(df),pushColNames))] +} + + +set_names <- function(df, newnames){ + df<- as.df(df) + names(df) <- newnames; + return(df) +} + + +print_head <- function(df, desc=NULL){ + print(head(df)) + return(df) +} + + +fac2char <- function(mydata, convert=names(mydata)[sapply(mydata, is.factor)]){ + if(length(convert)==0) + return(mydata) + + inputColOrder <- names(mydata) + + convertData <- subset(mydata, select= names(mydata)%in%convert) + convertData <- as.data.frame(lapply(convertData, as.character), stringsAsFactors = FALSE) + + keepData <- subset(mydata, select=!(names(mydata)%in%convert)) + newdata <- cbind(convertData, keepData) + newdata <- newdata[,inputColOrder] + + return(newdata) +} + + +## workaround for biomart +## Deprecated: load dplyr after biomart to avoid this problem +#dselect <- function(...) dplyr::select(...) + ######################################################################################################################## #### File System diff --git a/R/datatable_utils.R b/R/datatable_utils.R index 3a7733d..7b9214d 100644 --- a/R/datatable_utils.R +++ b/R/datatable_utils.R @@ -1,4 +1,4 @@ - require.auto(data.table) +require.auto(data.table) dt.merge <- function(dfA, dfB, by=intersect(names(dfA), names(dfB)) , ...) { diff --git a/R/df_utils.R b/R/df_utils.R deleted file mode 100644 index 1bf549a..0000000 --- a/R/df_utils.R +++ /dev/null @@ -1,77 +0,0 @@ - - -subsample <- function(df, sampleSize, ...){ - df[sample(1:nrow(df), min(sampleSize, nrow(df)), ...),] -} - -shuffle <- function(df) df[sample(nrow(df)),] - -first <- function(x, n=1) head(x,n) - -as.df <- function(dt) as.data.frame(dt) - - -## small wrappers - - - - -rownames2column <- function(df, colname){ - df <- as.df(df) - df$tttt <- rownames(df); - rownames(df) <- NULL; - rename(df, c(tttt=colname)) -} - -column2rownames<- function(df, colname){ -#browser() - rownames(df) <- ac(df[,colname]) - df[colname] <- NULL - return(df) -} - -## pushing some columns to the right end of a data.frame -push_right <- function(df, pushColNames){ - df[,c(setdiff(names(df),pushColNames), pushColNames)] -} - -push_left <- function(df, pushColNames){ - df[,c(pushColNames, setdiff(names(df),pushColNames))] -} - -set_names <- function(df, newnames){ - df<- as.df(df) - names(df) <- newnames; - return(df) -} - - -## dplyr utilities - -print_head <- function(df, desc=NULL){ - print(head(df)) - return(df) -} - - -fac2char <- function(mydata, convert=names(mydata)[sapply(mydata, is.factor)]){ - if(length(convert)==0) - return(mydata) - - inputColOrder <- names(mydata) - - convertData <- subset(mydata, select= names(mydata)%in%convert) - convertData <- as.data.frame(lapply(convertData, as.character), stringsAsFactors = FALSE) - - keepData <- subset(mydata, select=!(names(mydata)%in%convert)) - newdata <- cbind(convertData, keepData) - newdata <- newdata[,inputColOrder] - - return(newdata) -} - - -## workaround for biomart -dselect <- function(...) dplyr::select(...) - - diff --git a/R/ggplot/ggplot_utils.R b/R/ggplot/ggplot_utils.R index b77ae14..e613b24 100644 --- a/R/ggplot/ggplot_utils.R +++ b/R/ggplot/ggplot_utils.R @@ -1,5 +1,6 @@ require.auto(ggplot2) +require.auto(scales) -- GitLab