split up R utils

31d82898 · Holger Brandl · 93b4c0c9 · 31d82898 · 31d82898 · 31d82898
Commit 31d82898 authored 10 years ago by Holger Brandl
--- a/R/core_commons.R
+++ b/R/core_commons.R
+
+## automatic package installation
+require.auto <-  function(x){
+    x <- as.character(substitute(x))
+
+    if(isTRUE(x %in% .packages(all.available=TRUE))) {
+        eval(parse(text=paste("require(", x, ",  quietly=T)", sep="")))
+    } else {
+    #        update.packages(ask=F) # update dependencies, if any.
+        eval(parse(text=paste("install.packages('", x, "')", sep="")))
+    }
+
+    if(isTRUE(x %in% .packages(all.available=TRUE))) {
+        eval(parse(text=paste("require(", x, ",  quietly=T)", sep="")))
+    } else {
+        source("http://bioconductor.org/biocLite.R")
+        biocLite(character(), ask=FALSE) # update dependencies, if any.
+        eval(parse(text=paste("biocLite('", x, "')", sep="")))
+        eval(parse(text=paste("require(", x, ",  quietly=T)", sep="")))
+    }
+}
+
+require.auto(plyr)
+require.auto(stringr)
+require.auto(reshape2)
+#require.auto(reshape2, quietly=T, warn.conflicts=F)
+require.auto(scales)
+
+require.auto(dplyr)
+
+
+#require.auto(data.table)
+
+options(help_type="html")
+options(width=150)
+
+# for sqldf to avoid the use of tckl
+options(gsubfn.engine = "R")
+
+
+##### set the r mirror
+#cat(".Rprofile: Setting Goettingen repository\n")
+r = getOption("repos") # hard code the UK repo for CRAN 
+r["CRAN"] = "http://ftp5.gwdg.de/pub/misc/cran/"
+options(repos = r)
+rm(r)
+
+
+
+
+#qlibrary <- function(libname) {library(as.character(substitute(libname)), quietly=T, warn.conflicts=F, character.only=T )}
+
+
+
+
+########################################################################################################################
+#### Small aliases
+
+
+#praste <- function(...) print(paste(...))
+echo <- function(...) print(paste(...))
+
+
+ac <- function(...) as.character(...)
+
+qns <- function() quit(save="no")
+
+
+# string concatenation without space gaps (could/should use paste0 instead)
+concat <- function(...) paste(..., sep="")
+
+
+se<-function(x)	sd(x, na.rm=TRUE) / sqrt(sum(!is.na(x)))
+
+unlen <- function(x) length(unique(x))
+
+pp <- function(dat) page(dat, method = "print")
+
+
+
+########################################################################################################################
+#### File System
+
+is.directory <- function(dirname) !is.na(file.info(dirname)$isdir)
+
+
+mcdir <- function(dirname){
+    if(!file.exists(dirname)){
+        dir.create(dirname)
+    }
+
+    setwd(dirname)
+}
+
+locload <- function(fileName) local(get(load(fileName)))
+
+
+rmerge <- function(LDF, by, ...){
+    DF <- LDF[[1]]
+    for (i in 2:length(LDF)) {
+        DF <- merge(DF, LDF[[i]], by=by)
+    }
+    DF
+}
+
+
+
+
+trimEnd <-function(fileNames, exts=c()){
+    for(fileExt in exts){
+        fileNames <- str_replace(fileNames, paste(fileExt, "$",sep=""), "")
+    }
+
+    fileNames
+}
+
+chopFileExt <-function(fileNames, exts=c()){
+    warning("this method is deprecated. use trimEnd instead")
+    for(fileExt in exts){
+        fileNames <- str_replace(fileNames, paste(".", fileExt, "$",sep=""), "")
+    }
+
+    fileNames
+}
+
+write.delim <- function(df, header=TRUE,...){
+    write.table(df, row.names=FALSE, col.names=header, sep="\t", ...)
+}
+
+## writes a table in bed format expecting columns being ordered according to bed spec already
+write.bed <- function(bedData, file){
+    write.table(bedData, file=file, quote=FALSE, sep ="\t", na="NA", row.names=FALSE, col.names=FALSE)
+}
+#options(scipen=100) ## necessary to disable scientific number formats for long integers
+
+
+
+
+rmSomeElements <- function(vec, toDel) vec[!(vec %in% toDel)]
+
+rmLastElement <- function(vec) vec[-length(vec)]
+
+
+########################################################################################################################
+#### File System
+
+## Memory management
+
+# improved list of objects
+lsos <- function (pos = 1, pattern, order.by,
+                        decreasing=FALSE, head=FALSE, n=5) {
+    napply <- function(names, fn) sapply(names, function(x)
+                                         fn(get(x, pos = pos)))
+    names <- ls(pos = pos, pattern = pattern)
+    obj.class <- napply(names, function(x) as.character(class(x))[1])
+    obj.mode <- napply(names, mode)
+    obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
+    obj.size <- napply(names, object.size)/1000000
+    obj.dim <- t(napply(names, function(x)
+                        as.numeric(dim(x))[1:2]))
+    vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
+    obj.dim[vec, 1] <- napply(names, length)[vec]
+    out <- data.frame(obj.type, obj.size, obj.dim)
+    names(out) <- c("Type", "Size", "Rows", "Columns")
+    if (!missing(order.by))
+        out <- out[order(out[[order.by]], decreasing=decreasing), ]
+    if (head)
+        out <- head(out, n)
+#        out
+    out <- transform(out, var_name=rownames(out))
+    rownames(out) <- NULL
+    arrange(out, Size)
+}
+
+# shorthand
+lsosh <- function(..., n=10) {
+    lsos(..., order.by="Size", decreasing=TRUE, head=TRUE, n=n)
+}
+
--- a/R/plyr_utils.R
+++ b/R/plyr_utils.R
+    require.auto(data.table)
+
+
+dt.merge <- function(dfA, dfB, by=intersect(names(dfA), names(dfB)) , ...)  {
+#    require(data.table)
+    as.df(merge(data.table(dfA, key=by), data.table(dfB, key=by), ...))
+#    unloadNameSpace(data.table)
+}
+

-require(data.table)

 #   http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table
 unique_rows <- function(df, columns){
+
    unique(setkeyv(data.table(df), columns)) %>% as.df()
 }
--- a/R/df_utils.R
+++ b/R/df_utils.R
+
+
+subsample <- function(df, sampleSize, ...){
+    df[sample(1:nrow(df), min(sampleSize, nrow(df)), ...),]
+}
+
+shuffle <- function(df) df[sample(nrow(df)),]
+
+first <- function(x, n=1) head(x,n)
+
+as.df <- function(dt) as.data.frame(dt)
+
+
+## small wrappers
+
+
+
+
+rownames2column <- function(df, colname){
+    df <- as.df(df)
+    df$tttt <- rownames(df);
+    rownames(df) <- NULL;
+    rename(df, c(tttt=colname))
+}
+
+column2rownames<- function(df, colname){
+#browser()
+    rownames(df) <- ac(df[,colname])
+    df[colname] <- NULL
+    return(df)
+}
+
+## pushing some columns to the right end of a data.frame
+push_right <- function(df, pushColNames){
+    df[,c(setdiff(names(df),pushColNames), pushColNames)]
+}
+
+push_left <- function(df, pushColNames){
+    df[,c(pushColNames, setdiff(names(df),pushColNames))]
+}
+
+set_names <- function(df, newnames){
+    df<- as.df(df)
+    names(df) <- newnames;
+    return(df)
+}
+
+
+## dplyr utilities
+
+print_head <- function(df, desc=NULL){
+    print(head(df))
+    return(df)
+}
+
+
+fac2char <- function(mydata, convert=names(mydata)[sapply(mydata, is.factor)]){
+    if(length(convert)==0)
+        return(mydata)
+
+    inputColOrder <- names(mydata)
+
+    convertData <- subset(mydata, select= names(mydata)%in%convert)
+    convertData <- as.data.frame(lapply(convertData, as.character), stringsAsFactors = FALSE)
+
+    keepData <-  subset(mydata, select=!(names(mydata)%in%convert))
+    newdata <- cbind(convertData, keepData)
+    newdata <- newdata[,inputColOrder]
+
+    return(newdata)
+}
+
+
+## workaround for biomart
+dselect <- function(...) dplyr::select(...)
+
+
--- a/R/ggheatmap.R
+++ b/R/ggheatmap.R
--- a/R/ggplot_utils.R
+++ b/R/ggplot_utils.R

+require.auto(ggplot2)
+
+
+
+plotPDF <- function(filename, expr){ pdf(paste0(filename, ".pdf")); expr; dev.off(); }
+#plotPDF("test", plot(1:10))
+
+
+# Multiple plot function
+#
+# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
+# - cols:   Number of columns in layout
+# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
+#
+# If the layout is something like matrix(c(1,2,3,3), nrow=2, by.row=TRUE),
+# then plot 1 will go in the upper left, 2 will go in the upper right, and
+# 3 will go all the way across the bottom.
+#
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+  require(grid)
+
+  # Make a list from the ... arguments and plotlist
+  plots <- c(list(...), plotlist)
+
+  numPlots = length(plots)
+
+  # If layout is NULL, then use 'cols' to determine layout
+  if (is.null(layout)) {
+    # Make the panel
+    # ncol: Number of columns of plots
+    # nrow: Number of rows needed, calculated from # of cols
+    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+                    ncol = cols, nrow = ceiling(numPlots/cols))
+  }
+
+ if (numPlots==1) {
+    print(plots[[1]])
+
+  } else {
+    # Set up the page
+    grid.newpage()
+    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+
+    # Make each plot, in the correct location
+    for (i in 1:numPlots) {
+      # Get the i,j matrix positions of the regions that contain this subplot
+      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+
+      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+                                      layout.pos.col = matchidx$col))
+    }
+  }
+}
+
+rotXlab <- function() theme(axis.text.x = element_text(angle = 90, hjust = 1))
+
+
+gg2Format="png"
+
+ggsave2 <- function(gplot=last_plot(), width=8, height=6, prefix="", saveData=FALSE, outputFormat=gg2Format, ...){
+    title <- try(gplot$labels[["title"]])
+
+    if(is.null(title)){
+        varMapping <- gplot$labels
+        varMapping <- varMapping[names(varMapping) %in% c("x", "y")]
+#        if(varMapping==NULL){
+#            varMapping <- gplot$mapping;
+#        }
+
+        if(length(varMapping) == 1){
+            title= paste("distribution of", varMapping)
+        }else{
+            title = try(paste(varMapping, collapse=" vs "))
+        # stop("last plot had no title. Use ggsave() and give it a manual title")
+        }
+
+        rawFacetDesc <- format(gplot$facet)
+        if(rawFacetDesc!="facet_null()"){
+            title <- paste(title, "by", str_replace_all(str_match(rawFacetDesc, "facet_.*[(](.*))")[,2], "~", "and"))
+        }
+    }
+
+
+    fileBaseName <- ifelse(nchar(prefix)>0, concat(prefix, " - ", title), title)
+
+    ## clean up weired characters
+    fileBaseName <- str_replace_all(fileBaseName, "[$%/?]", "_")
+
+    fileName = concat(fileBaseName, concat(".", outputFormat))
+
+    ## remove line-breaks and trim spaces
+    fileName = str_replace_all(str_replace_all(fileName, "\\n", ""), "[ ]{2,}", " ")
+#    print(paste("saving plot to ", fileName))
+    ggsave(fileName, width=width, height=height, ...)
+
+    if(saveData){
+        write.delim(gplot$data, file= concat(fileBaseName, ".txt"))
+    }
+
+
+    ##todo review
+    LAST_GG2_OUTPUT <<- fileName
+
+    return(fileName)
+}
+
+### alternate over all open devices when running ggplot
+## disabled because it's causing problems if X11 device is not available
+#ggplot <- function(...){
+#    dev.set(dev.next())
+#    ggplot2::ggplot(...)
+#}
+
+
+scale_fill_redgreed <- function() scale_fill_manual(values = c("red","darkgreen"))


 ########################################################################################################################

--- a/R/holger_utils.R
+++ b/R/holger_utils.R
+
+xtab<- function(mydata){
+    tFile<-tempfile(fileext=".csv",tmpdir="~/tmp")
+    write.table(mydata, file=tFile, row.names=F, sep=",")
+
+    if(str_detect(R.version$platform, "linux")){
+        system(paste("ssh bioinfo-mac-6-wifi 'open -a /Applications/XTabulator.app/ /Volumes/bioinfo/tmp/", basename(tFile), "'", sep=""))
+    }else{
+        system(paste("open -a /Applications/XTabulator.app/ ", tFile))
+    }
+}
+
+xtabh <- function(mydata, rows=100) xtab(head(mydata, rows))
+
+
+## todo remove this
+TAB <- "\t"
+
+xls <- function(data, remoteDesktop="bioinfo-mac-6.mpi-cbg.de"){
+
+#    isRemote=str_detect(R.version$platform, "linux")
+
+
+    if(str_detect(R.version$platform, "linux")){
+        tFile<-tempfile(fileext=".tsv",tmpdir="~/tmp")
+
+        ## http://linux.icydog.net/ssh/piping.php
+        write.table(data, file=pipe(concat("ssh brandl@",remoteDesktop," 'cat - >",tFile,"'")), row.names=F, sep=TAB, quote=F)
+#        browser()
+
+#        system(paste("ssh ",remoteDesktop," 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" ", tFile, "'", sep=""))
+        system(format(paste("ssh ",remoteDesktop," 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" ", tFile, "'", sep="")))
+
+    }else{
+        tFile<-tempfile(fileext=".csv")
+        write.table(data, file=tFile, row.names=F, sep=",")
+        system(paste("open -a '/Applications/Microsoft Excel' ", tFile))
+    }
+}
+
+xlsh <- function(mydata, rows=100) xls(head(mydata, rows))
+
+