Commit 31d82898 authored by Holger Brandl's avatar Holger Brandl

split up R utils

parent 93b4c0c9
## automatic package installation
require.auto <- function(x){
x <- as.character(substitute(x))
if(isTRUE(x %in% .packages(all.available=TRUE))) {
eval(parse(text=paste("require(", x, ", quietly=T)", sep="")))
} else {
# update.packages(ask=F) # update dependencies, if any.
eval(parse(text=paste("install.packages('", x, "')", sep="")))
}
if(isTRUE(x %in% .packages(all.available=TRUE))) {
eval(parse(text=paste("require(", x, ", quietly=T)", sep="")))
} else {
source("http://bioconductor.org/biocLite.R")
biocLite(character(), ask=FALSE) # update dependencies, if any.
eval(parse(text=paste("biocLite('", x, "')", sep="")))
eval(parse(text=paste("require(", x, ", quietly=T)", sep="")))
}
}
require.auto(plyr)
require.auto(stringr)
require.auto(reshape2)
#require.auto(reshape2, quietly=T, warn.conflicts=F)
require.auto(scales)
require.auto(dplyr)
#require.auto(data.table)
options(help_type="html")
options(width=150)
# for sqldf to avoid the use of tckl
options(gsubfn.engine = "R")
##### set the r mirror
#cat(".Rprofile: Setting Goettingen repository\n")
r = getOption("repos") # hard code the UK repo for CRAN

r["CRAN"] = "http://ftp5.gwdg.de/pub/misc/cran/"
options(repos = r)
rm(r)
#qlibrary <- function(libname) {library(as.character(substitute(libname)), quietly=T, warn.conflicts=F, character.only=T )}
########################################################################################################################
#### Small aliases
#praste <- function(...) print(paste(...))
echo <- function(...) print(paste(...))
ac <- function(...) as.character(...)
qns <- function() quit(save="no")
# string concatenation without space gaps (could/should use paste0 instead)
concat <- function(...) paste(..., sep="")
se<-function(x) sd(x, na.rm=TRUE) / sqrt(sum(!is.na(x)))
unlen <- function(x) length(unique(x))
pp <- function(dat) page(dat, method = "print")
########################################################################################################################
#### File System
is.directory <- function(dirname) !is.na(file.info(dirname)$isdir)
mcdir <- function(dirname){
if(!file.exists(dirname)){
dir.create(dirname)
}
setwd(dirname)
}
locload <- function(fileName) local(get(load(fileName)))
rmerge <- function(LDF, by, ...){
DF <- LDF[[1]]
for (i in 2:length(LDF)) {
DF <- merge(DF, LDF[[i]], by=by)
}
DF
}
trimEnd <-function(fileNames, exts=c()){
for(fileExt in exts){
fileNames <- str_replace(fileNames, paste(fileExt, "$",sep=""), "")
}
fileNames
}
chopFileExt <-function(fileNames, exts=c()){
warning("this method is deprecated. use trimEnd instead")
for(fileExt in exts){
fileNames <- str_replace(fileNames, paste(".", fileExt, "$",sep=""), "")
}
fileNames
}
write.delim <- function(df, header=TRUE,...){
write.table(df, row.names=FALSE, col.names=header, sep="\t", ...)
}
## writes a table in bed format expecting columns being ordered according to bed spec already
write.bed <- function(bedData, file){
write.table(bedData, file=file, quote=FALSE, sep ="\t", na="NA", row.names=FALSE, col.names=FALSE)
}
#options(scipen=100) ## necessary to disable scientific number formats for long integers
rmSomeElements <- function(vec, toDel) vec[!(vec %in% toDel)]
rmLastElement <- function(vec) vec[-length(vec)]
########################################################################################################################
#### File System
## Memory management
# improved list of objects
lsos <- function (pos = 1, pattern, order.by,
decreasing=FALSE, head=FALSE, n=5) {
napply <- function(names, fn) sapply(names, function(x)
fn(get(x, pos = pos)))
names <- ls(pos = pos, pattern = pattern)
obj.class <- napply(names, function(x) as.character(class(x))[1])
obj.mode <- napply(names, mode)
obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
obj.size <- napply(names, object.size)/1000000
obj.dim <- t(napply(names, function(x)
as.numeric(dim(x))[1:2]))
vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
obj.dim[vec, 1] <- napply(names, length)[vec]
out <- data.frame(obj.type, obj.size, obj.dim)
names(out) <- c("Type", "Size", "Rows", "Columns")
if (!missing(order.by))
out <- out[order(out[[order.by]], decreasing=decreasing), ]
if (head)
out <- head(out, n)
# out
out <- transform(out, var_name=rownames(out))
rownames(out) <- NULL
arrange(out, Size)
}
# shorthand
lsosh <- function(..., n=10) {
lsos(..., order.by="Size", decreasing=TRUE, head=TRUE, n=n)
}
require.auto(data.table)
dt.merge <- function(dfA, dfB, by=intersect(names(dfA), names(dfB)) , ...) {
# require(data.table)
as.df(merge(data.table(dfA, key=by), data.table(dfB, key=by), ...))
# unloadNameSpace(data.table)
}
require(data.table)
# http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table
unique_rows <- function(df, columns){
unique(setkeyv(data.table(df), columns)) %>% as.df()
}
subsample <- function(df, sampleSize, ...){
df[sample(1:nrow(df), min(sampleSize, nrow(df)), ...),]
}
shuffle <- function(df) df[sample(nrow(df)),]
first <- function(x, n=1) head(x,n)
as.df <- function(dt) as.data.frame(dt)
## small wrappers
rownames2column <- function(df, colname){
df <- as.df(df)
df$tttt <- rownames(df);
rownames(df) <- NULL;
rename(df, c(tttt=colname))
}
column2rownames<- function(df, colname){
#browser()
rownames(df) <- ac(df[,colname])
df[colname] <- NULL
return(df)
}
## pushing some columns to the right end of a data.frame
push_right <- function(df, pushColNames){
df[,c(setdiff(names(df),pushColNames), pushColNames)]
}
push_left <- function(df, pushColNames){
df[,c(pushColNames, setdiff(names(df),pushColNames))]
}
set_names <- function(df, newnames){
df<- as.df(df)
names(df) <- newnames;
return(df)
}
## dplyr utilities
print_head <- function(df, desc=NULL){
print(head(df))
return(df)
}
fac2char <- function(mydata, convert=names(mydata)[sapply(mydata, is.factor)]){
if(length(convert)==0)
return(mydata)
inputColOrder <- names(mydata)
convertData <- subset(mydata, select= names(mydata)%in%convert)
convertData <- as.data.frame(lapply(convertData, as.character), stringsAsFactors = FALSE)
keepData <- subset(mydata, select=!(names(mydata)%in%convert))
newdata <- cbind(convertData, keepData)
newdata <- newdata[,inputColOrder]
return(newdata)
}
## workaround for biomart
dselect <- function(...) dplyr::select(...)
require.auto(ggplot2)
plotPDF <- function(filename, expr){ pdf(paste0(filename, ".pdf")); expr; dev.off(); }
#plotPDF("test", plot(1:10))
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, by.row=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
rotXlab <- function() theme(axis.text.x = element_text(angle = 90, hjust = 1))
gg2Format="png"
ggsave2 <- function(gplot=last_plot(), width=8, height=6, prefix="", saveData=FALSE, outputFormat=gg2Format, ...){
title <- try(gplot$labels[["title"]])
if(is.null(title)){
varMapping <- gplot$labels
varMapping <- varMapping[names(varMapping) %in% c("x", "y")]
# if(varMapping==NULL){
# varMapping <- gplot$mapping;
# }
if(length(varMapping) == 1){
title= paste("distribution of", varMapping)
}else{
title = try(paste(varMapping, collapse=" vs "))
# stop("last plot had no title. Use ggsave() and give it a manual title")
}
rawFacetDesc <- format(gplot$facet)
if(rawFacetDesc!="facet_null()"){
title <- paste(title, "by", str_replace_all(str_match(rawFacetDesc, "facet_.*[(](.*))")[,2], "~", "and"))
}
}
fileBaseName <- ifelse(nchar(prefix)>0, concat(prefix, " - ", title), title)
## clean up weired characters
fileBaseName <- str_replace_all(fileBaseName, "[$%/?]", "_")
fileName = concat(fileBaseName, concat(".", outputFormat))
## remove line-breaks and trim spaces
fileName = str_replace_all(str_replace_all(fileName, "\\n", ""), "[ ]{2,}", " ")
# print(paste("saving plot to ", fileName))
ggsave(fileName, width=width, height=height, ...)
if(saveData){
write.delim(gplot$data, file= concat(fileBaseName, ".txt"))
}
##todo review
LAST_GG2_OUTPUT <<- fileName
return(fileName)
}
### alternate over all open devices when running ggplot
## disabled because it's causing problems if X11 device is not available
#ggplot <- function(...){
# dev.set(dev.next())
# ggplot2::ggplot(...)
#}
scale_fill_redgreed <- function() scale_fill_manual(values = c("red","darkgreen"))
########################################################################################################################
......
xtab<- function(mydata){
tFile<-tempfile(fileext=".csv",tmpdir="~/tmp")
write.table(mydata, file=tFile, row.names=F, sep=",")
if(str_detect(R.version$platform, "linux")){
system(paste("ssh bioinfo-mac-6-wifi 'open -a /Applications/XTabulator.app/ /Volumes/bioinfo/tmp/", basename(tFile), "'", sep=""))
}else{
system(paste("open -a /Applications/XTabulator.app/ ", tFile))
}
}
xtabh <- function(mydata, rows=100) xtab(head(mydata, rows))
## todo remove this
TAB <- "\t"
xls <- function(data, remoteDesktop="bioinfo-mac-6.mpi-cbg.de"){
# isRemote=str_detect(R.version$platform, "linux")
if(str_detect(R.version$platform, "linux")){
tFile<-tempfile(fileext=".tsv",tmpdir="~/tmp")
## http://linux.icydog.net/ssh/piping.php
write.table(data, file=pipe(concat("ssh brandl@",remoteDesktop," 'cat - >",tFile,"'")), row.names=F, sep=TAB, quote=F)
# browser()
# system(paste("ssh ",remoteDesktop," 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" ", tFile, "'", sep=""))
system(format(paste("ssh ",remoteDesktop," 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" ", tFile, "'", sep="")))
}else{
tFile<-tempfile(fileext=".csv")
write.table(data, file=tFile, row.names=F, sep=",")
system(paste("open -a '/Applications/Microsoft Excel' ", tFile))
}
}
xlsh <- function(mydata, rows=100) xls(head(mydata, rows))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment