core_commons.R 18.6 KB
Newer Older
Holger Brandl's avatar
Holger Brandl committed
1 2 3 4
########################################################################################################################
## set a default cran r mirror  and customize environment

#cat(".Rprofile: Setting Goettingen repository\n")
Holger Brandl's avatar
Holger Brandl committed
5 6
#todo consider to use chooseCRANmirror(graphics=FALSE, ind=10) instead

Holger Brandl's avatar
Holger Brandl committed
7 8 9 10 11 12 13
r = getOption("repos") # hard code the UK repo for CRAN

r["CRAN"] = "http://ftp5.gwdg.de/pub/misc/cran/"
options(repos = r)
rm(r)


## user browser for help
Holger Brandl's avatar
Holger Brandl committed
14
options(help_type = "html")
Holger Brandl's avatar
Holger Brandl committed
15 16

## plot more characters per line
Holger Brandl's avatar
Holger Brandl committed
17
options(width = 150)
Holger Brandl's avatar
Holger Brandl committed
18

19 20 21
## adjust dplyr printing settings
## http://stackoverflow.com/questions/22471256/overriding-variables-not-shown-in-dplyr-to-display-all-columns-from-df
## http://stackoverflow.com/questions/29396111/dplyrtbl-df-fill-whole-screen
22
# options(dplyr.print_min = 20) ## num rows
23
options(dplyr.width = 130) ## width
24
#options(dplyr.width = 250); options(width=250) ## width
25

Holger Brandl's avatar
Holger Brandl committed
26 27 28
# for sqldf to avoid the use of tckl
options(gsubfn.engine = "R")

Holger Brandl's avatar
Holger Brandl committed
29

Holger Brandl's avatar
Holger Brandl committed
30
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
31
## automatic package installation
Holger Brandl's avatar
Holger Brandl committed
32

Holger Brandl's avatar
Holger Brandl committed
33

34
## externalized installer to also allow for installation without loading
35
install_package <- function(x){
Holger Brandl's avatar
Holger Brandl committed
36
    if (! isTRUE(x %in% .packages(all.available = TRUE)) && any(available.packages()[, 1] == x)) {
37
        # update.packages(ask=F) # update dependencies, if any.
Holger Brandl's avatar
Holger Brandl committed
38
        eval(parse(text = paste("install.packages('", x, "')", sep = "")))
39 40 41
    }

    ## if it's still missing check if it's on bioconductor
Holger Brandl's avatar
Holger Brandl committed
42
    if (! isTRUE(x %in% .packages(all.available = TRUE))) {
43 44
        bcPackages <- as.vector(read.dcf(url("https://bioconductor.org/packages/3.3/bioc/src/contrib/PACKAGES"), "Package"))

Holger Brandl's avatar
Holger Brandl committed
45
        if (any(bcPackages == x)) {
46
            source("http://bioconductor.org/biocLite.R")
Holger Brandl's avatar
Holger Brandl committed
47
            eval(parse(text = paste("biocLite('", x, "', ask=FALSE)", sep = "")))
48 49
        }
    }
50 51
}

Holger Brandl's avatar
Holger Brandl committed
52
load_pack <- function(x, warn_conflicts=T){
Holger Brandl's avatar
Holger Brandl committed
53 54
    x <- as.character(substitute(x));

Holger Brandl's avatar
Holger Brandl committed
55
    install_package(x)
Holger Brandl's avatar
Holger Brandl committed
56

57
    ## load it using a library function so that load_pack errors if package is still not ins
Holger Brandl's avatar
Holger Brandl committed
58
    eval(parse(text = paste("base::library(", x, ",  quietly=T, warn.conflicts=", warn_conflicts, ")", sep = "")))
Holger Brandl's avatar
Holger Brandl committed
59 60
}

61

62 63
check_version = function(pkg_name, min_version) {
    cur_version = packageVersion(pkg_name)
Holger Brandl's avatar
Holger Brandl committed
64
    if (cur_version < min_version) stop(sprintf("Package %s needs a newer version,
65 66 67
               found %s, need at least %s", pkg_name, cur_version, min_version))
}
#check_version("dplyr", "0.4-1")
Holger Brandl's avatar
Holger Brandl committed
68

Holger Brandl's avatar
Holger Brandl committed
69
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
70
## load core packages
Holger Brandl's avatar
Holger Brandl committed
71

Holger Brandl's avatar
Holger Brandl committed
72 73 74 75 76
#if(!any(.packages(all.available=TRUE)=="biomaRt")){
#    source("http://bioconductor.org/biocLite.R")
#    biocLite("biomaRt", ask=FALSE)
#}

Holger Brandl's avatar
Holger Brandl committed
77 78
#load_pack(plyr)
#load_pack(reshape2)
79
#load_pack(reshape2, quietly=T, warn_conflicts=F)
Holger Brandl's avatar
Holger Brandl committed
80

Holger Brandl's avatar
Holger Brandl committed
81
## load on purpose after plyr
Holger Brandl's avatar
Holger Brandl committed
82
load_pack(purrr)
Holger Brandl's avatar
Holger Brandl committed
83
load_pack(tibble)
Holger Brandl's avatar
Holger Brandl committed
84 85 86
load_pack(dplyr, warn_conflicts = F)
load_pack(magrittr, warn_conflicts = F)
load_pack(tidyr, warn_conflicts = F)
Holger Brandl's avatar
Holger Brandl committed
87 88
load_pack(stringr)
load_pack(readr)
Holger Brandl's avatar
Holger Brandl committed
89
load_pack(forcats)
Holger Brandl's avatar
Holger Brandl committed
90
load_pack(readxl) ## supress differring build number
91

Holger Brandl's avatar
Holger Brandl committed
92
## needed for caching
93
load_pack(digest)
Holger Brandl's avatar
Holger Brandl committed
94 95 96

#suppressWarnings(load_pack(readxl)) ## supress differring build number

97
#load_pack(readxl) ## supress differring build number
98 99 100


## common plotting requirements since they are omnipresent
101
load_pack(ggplot2)
Holger Brandl's avatar
Holger Brandl committed
102
load_pack(scales, warn_conflicts = F)
103
load_pack(grid)
104

Holger Brandl's avatar
Holger Brandl committed
105
## for table exploration without using Rstudio
106
load_pack(DT)
Holger Brandl's avatar
Holger Brandl committed
107

Holger Brandl's avatar
Holger Brandl committed
108 109

## moved into datatable_commons because replaced almost everywhere with dplyr
110
#load_pack(data.table)
Holger Brandl's avatar
Holger Brandl committed
111 112 113 114



########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
115
#### Convenience aliases
Holger Brandl's avatar
Holger Brandl committed
116 117


Holger Brandl's avatar
Holger Brandl committed
118
echo <- function(...) cat(paste(...), fill = T)
Holger Brandl's avatar
Holger Brandl committed
119 120 121 122

ac <- function(...) as.character(...)

# string concatenation without space gaps (could/should use paste0 instead)
Holger Brandl's avatar
Holger Brandl committed
123 124
## Deprecated: use paste0 instead
#concat <- function(...) paste(..., sep="")
Holger Brandl's avatar
Holger Brandl committed
125 126 127 128 129

unlen <- function(x) length(unique(x))

pp <- function(dat) page(dat, method = "print")

Holger Brandl's avatar
Holger Brandl committed
130 131
# TODO .Deprecated and .Defunct (see http://ropensci.org/blog/technotes/2017/01/05/package-evolution)

Holger Brandl's avatar
Holger Brandl committed
132
# as.df <- function(dt){ warning("DEPRECATED: use as_df instead of as.df"); as.data.frame(dt)}
Holger Brandl's avatar
Holger Brandl committed
133
as_df <- function(dt) as.data.frame(dt)
Holger Brandl's avatar
Holger Brandl committed
134

Holger Brandl's avatar
Holger Brandl committed
135

Holger Brandl's avatar
Holger Brandl committed
136 137 138 139 140
install_package("tibble")

## restore pre-tibble-v1.2 naming to creating data-frame in place
frame_data = function(...) tibble::tribble(...)

141

Holger Brandl's avatar
Holger Brandl committed
142
add_rownames = function(...){ warning("DEPRECATED: Use tibble::rownames_to_column directly"); tibble::rownames_to_column(...)}
143

Holger Brandl's avatar
Holger Brandl committed
144 145 146 147 148 149 150 151 152 153 154 155 156

## redefine dply::splat to allow for more streamlined rowwise df-processing
## see https://groups.google.com/forum/?utm_source=digest&utm_medium=email#!topic/manipulatr/E6WOcHlRJcw
#splat = function (flat) {
#    function(args, ...) {
#        do.call(flat, c(args, list(...)))
#    }
#}
## for now simply import just splat from plyr namespace
install_package("plyr")
splat = plyr::splat


Holger Brandl's avatar
Holger Brandl committed
157 158 159 160 161 162
########################################################################################################################
#### data.frame manipulation


shuffle <- function(df) df[sample(nrow(df)),]

Holger Brandl's avatar
Holger Brandl committed
163
first <- function(x, n=1) head(x, n)
Holger Brandl's avatar
Holger Brandl committed
164
first_group = function(x, which=1) x %>% nest %>% slice(which) %>% unnest(data)
Holger Brandl's avatar
Holger Brandl committed
165 166 167



Holger Brandl's avatar
Holger Brandl committed
168 169
vec2df <- function(namedVec){
    warning("DEPRECATED use vec_as_df instead of vec2df")
Holger Brandl's avatar
Holger Brandl committed
170
    namedVec %>% {data.frame(name = names(.), value = ., row.names = NULL)}
Holger Brandl's avatar
Holger Brandl committed
171 172 173
}

vec_as_df <- function(namedVec, row_name="name", value_name="value"){
Holger Brandl's avatar
Holger Brandl committed
174
    data_frame(name = names(namedVec), value = namedVec) %>% set_names(row_name, value_name)
Holger Brandl's avatar
Holger Brandl committed
175
}
Holger Brandl's avatar
Holger Brandl committed
176

Holger Brandl's avatar
Holger Brandl committed
177 178

rownames2column <- function(df, colname){
179
    warning("DEPRECATED Use dplyr::add_rownames directly")
Holger Brandl's avatar
Holger Brandl committed
180
    add_rownames(df, var = colname)
Holger Brandl's avatar
Holger Brandl committed
181 182 183
}


Holger Brandl's avatar
Holger Brandl committed
184
column2rownames <- function(df, colname){
185
    #browser()
Holger Brandl's avatar
Holger Brandl committed
186
    ## force into df to avoid dplyr problems
Holger Brandl's avatar
Holger Brandl committed
187
    df <- as_df(df)
Holger Brandl's avatar
Holger Brandl committed
188

Holger Brandl's avatar
Holger Brandl committed
189
    rownames(df) <- ac(df[, colname])
Holger Brandl's avatar
Holger Brandl committed
190 191 192 193
    df[colname] <- NULL
    return(df)
}

Holger Brandl's avatar
Holger Brandl committed
194 195
## pushing some columns to the end of a data.frame
## TBD how to make this possible without quoting column names?
Holger Brandl's avatar
Holger Brandl committed
196
push_right <- function(df, pushColNames){
Holger Brandl's avatar
Holger Brandl committed
197
    df[, c(setdiff(names(df), pushColNames), pushColNames)]
Holger Brandl's avatar
Holger Brandl committed
198 199 200
}


Holger Brandl's avatar
Holger Brandl committed
201
## pushing some columns to the beginning of a data.frame
Holger Brandl's avatar
Holger Brandl committed
202
push_left <- function(df, pushColNames){
Holger Brandl's avatar
Holger Brandl committed
203
    df[, c(pushColNames, setdiff(names(df), pushColNames))]
Holger Brandl's avatar
Holger Brandl committed
204 205 206
}


Holger Brandl's avatar
Holger Brandl committed
207
#http://astrostatistics.psu.edu/datasets/R/html/base/html/formals.html
208 209 210 211 212
## conflicts with purrr::set_names but does not work with ....
set_names <- function(df, ...){
    newnames <- as.character(unlist(list(...)))

    ## automatically convert matrices to data.frames (otherwise the names set would fail
213
    if (is.matrix(df))df %<>% as.data.frame()
214 215 216 217

    names(df) <- newnames;
    return(df)
}
218 219 220
# iris %>% purrr::set_names(paste(names(iris), "__")) %>% glimpse
# iris %>% set_names(paste(names(iris), "__")) %>% glimpse
#
Holger Brandl's avatar
Holger Brandl committed
221 222
#iris %>% set_names(c("setosa", "hallo")) %>% head
#iris %>% set_names("setosa", "hallo") %>% head
Holger Brandl's avatar
Holger Brandl committed
223 224


225
# devtools::source_url("https://www.dropbox.com/s/r6kim8kb8ohmptx/core_commons.R?dl=1")
226

227 228
pretty_names = function(some_names, make_unique=FALSE){
    new_names = some_names %>%
229
        str_replace_all("[#=.,()/*: -]+", "_") %>%
230 231
        str_replace(fixed("["), "") %>%
        str_replace(fixed("]"), "") %>%
Holger Brandl's avatar
Holger Brandl committed
232
    ## remove leading and tailing underscores
233 234
        str_replace("[_]+$", "") %>%
        str_replace("^[_]+", "") %>%
Holger Brandl's avatar
Holger Brandl committed
235
    ## remove unicode characters
236
        iconv(to = 'ASCII', sub = '') %>% ## http://stackoverflow.com/questions/24807147/removing-unicode-symbols-from-column-names
237 238 239
        tolower

    if(make_unique){
Holger Brandl's avatar
Holger Brandl committed
240
    ## make duplicates unqiue
241 242 243 244
      new_names %<>% make.unique(sep = "_")
    }

    new_names
245
}
246

247
pretty_columns = function(df){
248
    names(df) <- names(df) %>% pretty_names(make_unique=TRUE)
249 250
    df
}
Holger Brandl's avatar
Holger Brandl committed
251

252 253
# http://stackoverflow.com/questions/23188900/view-entire-dataframe-when-wrapped-in-tbl-df
print_all <- function(df) df %>% tbl_df %>% print(n = nrow(.))
Holger Brandl's avatar
Holger Brandl committed
254

Holger Brandl's avatar
Holger Brandl committed
255 256 257
head_html <- function(df, n=5) head(df, n) %>%
    knitr::kable(format = "html") %>%
    print()
Holger Brandl's avatar
Holger Brandl committed
258

Holger Brandl's avatar
Holger Brandl committed
259 260
print_head <- function(df, desc=NULL){
    print(head(df))
Holger Brandl's avatar
Holger Brandl committed
261
    print(nrow(df))
Holger Brandl's avatar
Holger Brandl committed
262 263 264 265 266
    return(df)
}


fac2char <- function(mydata, convert=names(mydata)[sapply(mydata, is.factor)]){
Holger Brandl's avatar
Holger Brandl committed
267
    if (length(convert) == 0) {
Holger Brandl's avatar
Holger Brandl committed
268 269
        return(mydata)
    }
Holger Brandl's avatar
Holger Brandl committed
270 271 272

    inputColOrder <- names(mydata)

Holger Brandl's avatar
Holger Brandl committed
273
    convertData <- subset(mydata, select = names(mydata) %in% convert)
Holger Brandl's avatar
Holger Brandl committed
274 275
    convertData <- as.data.frame(lapply(convertData, as.character), stringsAsFactors = FALSE)

Holger Brandl's avatar
Holger Brandl committed
276
    keepData <- subset(mydata, select = ! (names(mydata) %in% convert))
Holger Brandl's avatar
Holger Brandl committed
277
    newdata <- cbind(convertData, keepData)
Holger Brandl's avatar
Holger Brandl committed
278
    newdata <- newdata[, inputColOrder]
Holger Brandl's avatar
Holger Brandl committed
279 280 281 282

    return(newdata)
}

Holger Brandl's avatar
Holger Brandl committed
283 284 285 286
## convenience method to sort factor levels with decreasing frequencies
fct_revfreq = function(x) fct_infreq(x) %>% fct_rev


Holger Brandl's avatar
Holger Brandl committed
287
## replace R within pipe change just use ... %>% do(replaceNA(0)) %>% ...
Holger Brandl's avatar
Holger Brandl committed
288
replaceNA <- function(x, withValue) { x[is.na(x)] <- withValue; x}
Holger Brandl's avatar
Holger Brandl committed
289

Holger Brandl's avatar
Holger Brandl committed
290

Holger Brandl's avatar
Holger Brandl committed
291

292 293 294 295 296

## see http://stackoverflow.com/questions/17288222/r-find-value-in-multiple-data-frame-columns/40586572#40586572
## case-insenstive search all columns of a data-frame with a fixed search term
search_df = function(df, search_term){
    apply(df, 1, function(r){
Holger Brandl's avatar
Holger Brandl committed
297
        any(str_detect(as.character(r), fixed(search_term, ignore_case = T)))
298 299 300 301 302 303 304 305 306 307 308 309
    }) %>% subset(df, .)
}


## filter a data-frame for those rows where at least one column is matching the given expression (that must evaluate to a boolean vector for each row).
match_df = function(df, search_expr){
    filter_fun = eval(substitute(function(x){search_expr}))

    apply(df, 1, function(r) any(filter_fun(r))) %>% subset(df, .)
}


Holger Brandl's avatar
Holger Brandl committed
310
## todo still needed since there's if_else in dplyr now
Holger Brandl's avatar
Holger Brandl committed
311
safe_ifelse <- function(cond, yes, no) {
312
    warning("DEPRECATED Use dplyr::if_else instead")
Holger Brandl's avatar
Holger Brandl committed
313

Holger Brandl's avatar
Holger Brandl committed
314 315
    #browser()
    isfacOrChar <- function(x) class(x) %in% c("factor", "character")
Holger Brandl's avatar
Holger Brandl committed
316 317 318 319

    if (isfacOrChar(yes) | isfacOrChar(no)) {
        yes <- ac(yes)
        no <- ac(no)
320
    }
Holger Brandl's avatar
Holger Brandl committed
321

Holger Brandl's avatar
Holger Brandl committed
322
    ifelse(cond, yes, no)
Holger Brandl's avatar
Holger Brandl committed
323 324
}

Holger Brandl's avatar
Holger Brandl committed
325

Holger Brandl's avatar
Holger Brandl committed
326
## for na instead use mutate_each with:
Holger Brandl's avatar
Holger Brandl committed
327 328 329
#empty_as_na <- function(x) safe_ifelse(x=="", NA, x)
#empty_as_na <- function(x) ifelse(class(x)  %in% c("factor", "character") & x=="", NA, x)
empty_as_na <- function(x){
Holger Brandl's avatar
Holger Brandl committed
330 331
    if ("factor" %in% class(x))x <- as.character(x) ## since ifelse wont work with factors
    ifelse(as.character(x) != "", x, NA)
Holger Brandl's avatar
Holger Brandl committed
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
}

#if(F){ ## DEBUG of empty_as_na
#cond <- allJobs %>% head %$% submit_time %>% c("")
#empty_as_na( cond)
#cond <- allJobs %>% head %$% resubmission_of
#empty_as_na( cond)
#
#empty_as_na( c(1, 2, NA))
#empty_as_na( c("sdf", "sdf2", NA))
#empty_as_na( c("sdf", "sdf2", ""))
#
#myFac <- as.factor(c("sdf", "sdf2", NA))
#empty_as_na( myFac)
#ifelse(as.character(myFac)!="", myFac, NA)
#
#empty_as_na( c("sdf", "sdf2", ""))
#
#iris[1,1] <- ""
#apply(iris, 2, function(x) gsub("^$|^ $", NA, x))
#}


Holger Brandl's avatar
Holger Brandl committed
355 356 357
## see http://stackoverflow.com/questions/24172111/change-the-blank-cells-to-na/33952598#33952598


Holger Brandl's avatar
Holger Brandl committed
358 359 360 361 362 363
## apply dplyr::filter to df but use filter criterions for cross-tabulation beforehand
filter_count <- function(df, ...){
    print(count(df, ...))
    filter(df, ...)
}

Holger Brandl's avatar
Holger Brandl committed
364
# Example: publications %>% count(journal) %>% as("num_pubs")
Holger Brandl's avatar
Holger Brandl committed
365 366 367 368 369
# as = function(df, name){
#     warning("DEPRECATED use 'as_var' instead of 'as'")
#     names(df)[length(names(df))] = name
#     df
# }
370

371
n_as = function(df, name){
Holger Brandl's avatar
Holger Brandl committed
372 373 374 375
    names(df)[length(names(df))] = name
    df
}

Holger Brandl's avatar
Holger Brandl committed
376 377
#count_occ = function(df, ...) count(df, ...) %>% n_as("num_occ")

Holger Brandl's avatar
Holger Brandl committed
378 379 380
dcount = function(df, ...) count(df, ...) %>%
    n_as("num_occ") %>%
    count(num_occ)
Holger Brandl's avatar
Holger Brandl committed
381

Holger Brandl's avatar
Holger Brandl committed
382
count_as = function(df, n_name, ...) count(df, ...) %>% n_as(n_name)
Holger Brandl's avatar
Holger Brandl committed
383 384 385 386 387
#iris %>% count_as("num_occ", Species)
#iris %>% dcross_tab(Species)



Holger Brandl's avatar
Holger Brandl committed
388

Holger Brandl's avatar
Holger Brandl committed
389
distinct_all = function (x, ...) distinct(x, ..., .keep_all = T)
Holger Brandl's avatar
Holger Brandl committed
390

Holger Brandl's avatar
Holger Brandl committed
391 392 393 394 395 396
#' Return <code>true</code> if the data.frame is distinct with respect to the provided unqoted variabled names/expressions
is_distinct = function(x, ...){
    distinct(x) %>% nrow == nrow(x)
}


Holger Brandl's avatar
Holger Brandl committed
397 398 399 400
## fetch a column of a matrix in a magrittr pipe. Useful along with str_*
get_col = function(data, col_index) data[, col_index] ## also could use magrittr::extract here


Holger Brandl's avatar
Holger Brandl committed
401
## convience method to extract a column, defaults to _ as separator and the first column
Holger Brandl's avatar
Holger Brandl committed
402
extract_col = function(x, col_index=1, sep="_", num_cols=10){ str_split_fixed(x, sep, num_cols)[, col_index]}
Holger Brandl's avatar
Holger Brandl committed
403 404


Holger Brandl's avatar
Holger Brandl committed
405 406 407 408 409 410 411 412 413
## Extract the first group of a grouped data-frame
first_group = function(groupedDF){
    # groupedDF = geneLists
    # groupedDF = iris %>% group_by(Species)
    # https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
    (groupedDF %>% do(data = (.)) %$% map(data, identity))[[1]]
}


Holger Brandl's avatar
Holger Brandl committed
414

415 416 417 418 419 420 421
reload_dplyr <- function(){
    unloadNamespace('tidyr')
    unloadNamespace('dplyr')
    require(tidyr);require(dplyr)
}


422
## from http://stackoverflow.com/questions/7505547/detach-all-packages-while-working-in-r
423
unload_packages <- function() {
Holger Brandl's avatar
Holger Brandl committed
424 425 426 427
    basic.packages <- c("package:stats", "package:graphics", "package:grDevices", "package:utils", "package:datasets", "package:methods", "package:base")
    package.list <- search()[ifelse(unlist(gregexpr("package:", search())) == 1, TRUE, FALSE)]
    package.list <- setdiff(package.list, basic.packages)
    if (length(package.list) > 0)for (package in package.list) detach(package, character.only = TRUE)
428
}
Holger Brandl's avatar
Holger Brandl committed
429 430


Holger Brandl's avatar
Holger Brandl committed
431 432 433 434
## workaround for biomart
## Deprecated: load dplyr after biomart to avoid this problem
#dselect <- function(...) dplyr::select(...)

Holger Brandl's avatar
Holger Brandl committed
435

Holger Brandl's avatar
Holger Brandl committed
436
########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
437
#### Result Caching for long running tasks
Holger Brandl's avatar
Holger Brandl committed
438

Holger Brandl's avatar
Holger Brandl committed
439 440
## related: http://cran.r-project.org/web/packages/R.cache/R.cache.pdf

441
cache_it <- function(expr, filePrefix="cache"){
Holger Brandl's avatar
Holger Brandl committed
442
    cacheFile <- paste0(filePrefix, "_", substr(digest::digest(deparse(expr)), 1, 6)) %>% paste0(".", ., ".RData")
Holger Brandl's avatar
Holger Brandl committed
443

Holger Brandl's avatar
Holger Brandl committed
444
    if (file.exists(cacheFile)) {
Holger Brandl's avatar
Holger Brandl committed
445 446 447
        local(get(load(cacheFile)))
    } else {
        result <- eval(expr)
Holger Brandl's avatar
Holger Brandl committed
448
        save(result, file = cacheFile)
Holger Brandl's avatar
Holger Brandl committed
449 450 451 452 453 454
        result
    }
}

## Examples
#mydata <- quote(iris %>% filter(Species=="setosa")) %>% cache_it("tt")
Holger Brandl's avatar
Holger Brandl committed
455 456
#mydata <- quote(iris %>% filter(Species=="setosa")) %>% cache_it()
#mydata <- quote( { print("evaluate expr"); iris %>% filter(Species=="setosa") } ) %>% cache_it()
Holger Brandl's avatar
Holger Brandl committed
457 458 459 460

########################################################################################################################
#### File System

Holger Brandl's avatar
Holger Brandl committed
461
is.directory <- function(dirname) ! is.na(file.info(dirname)$isdir)
Holger Brandl's avatar
Holger Brandl committed
462 463 464


mcdir <- function(dirname){
Holger Brandl's avatar
Holger Brandl committed
465
    if (! file.exists(dirname)) {
Holger Brandl's avatar
Holger Brandl committed
466 467 468 469 470 471 472 473 474
        dir.create(dirname)
    }

    setwd(dirname)
}

locload <- function(fileName) local(get(load(fileName)))


Holger Brandl's avatar
Holger Brandl committed
475
## tbd: it would be more efficient to use Reduce here (see http://stackoverflow.com/questions/34344214/how-to-join-multiple-data-frames-using-dplyr)
Holger Brandl's avatar
Holger Brandl committed
476 477
rmerge <- function(LDF, by, ...){
    DF <- LDF[[1]]
Holger Brandl's avatar
Holger Brandl committed
478 479
    for (i in 2 : length(LDF)) {
        DF <- merge(DF, LDF[[i]], by = by)
Holger Brandl's avatar
Holger Brandl committed
480 481 482 483 484 485
    }
    DF
}



486 487 488 489 490
## DEPRECTED: use trim_ext instead
trimEnd <- function(fileNames, ...){
    warning("DEPRECATED: Use trim_ext instead");
    trim_ext(fileNames, ...)
}
Holger Brandl's avatar
Holger Brandl committed
491

492 493
trim_ext <- function(fileNames, ...){
    for (fileExt in list(...)) {
Holger Brandl's avatar
Holger Brandl committed
494
        fileNames <- str_replace(fileNames, paste(fileExt, "$", sep = ""), "")
Holger Brandl's avatar
Holger Brandl committed
495 496 497 498 499
    }

    fileNames
}

Holger Brandl's avatar
Holger Brandl committed
500
## DEPRECATED Use write_tsv instead
Holger Brandl's avatar
Holger Brandl committed
501 502
write.delim <- function(df, file, header=TRUE, ...){
    write.table(df, file, row.names = FALSE, col.names = header, sep = "\t", ...)
Holger Brandl's avatar
Holger Brandl committed
503 504
}

Holger Brandl's avatar
Holger Brandl committed
505
rmSomeElements <- function(vec, toDel) vec[! (vec %in% toDel)]
Holger Brandl's avatar
Holger Brandl committed
506

Holger Brandl's avatar
Holger Brandl committed
507
rmLastElement <- function(vec) vec[- length(vec)]
Holger Brandl's avatar
Holger Brandl committed
508 509 510 511 512


########################################################################################################################
## Memory management

Holger Brandl's avatar
Holger Brandl committed
513

Holger Brandl's avatar
Holger Brandl committed
514
# improved list of objects
Holger Brandl's avatar
Holger Brandl committed
515 516
lsos <- function (pos = 1, pattern, order.by, decreasing=FALSE, head=FALSE, n=5) {
    napply <- function(names, fn) sapply(names, function(x) fn(get(x, pos = pos)))
Holger Brandl's avatar
Holger Brandl committed
517
    names <- ls(pos = pos, pattern = pattern)
Holger Brandl's avatar
Holger Brandl committed
518

Holger Brandl's avatar
Holger Brandl committed
519 520 521
    obj.class <- napply(names, function(x) as.character(class(x))[1])
    obj.mode <- napply(names, mode)
    obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
Holger Brandl's avatar
Holger Brandl committed
522
    obj.size <- napply(names, object.size) / 1000000
Holger Brandl's avatar
Holger Brandl committed
523
    obj.dim <- t(napply(names, function(x)
Holger Brandl's avatar
Holger Brandl committed
524

Holger Brandl's avatar
Holger Brandl committed
525
    as.numeric(dim(x))[1 : 2]))
Holger Brandl's avatar
Holger Brandl committed
526 527
    vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
    obj.dim[vec, 1] <- napply(names, length)[vec]
Holger Brandl's avatar
Holger Brandl committed
528

Holger Brandl's avatar
Holger Brandl committed
529 530
    out <- data.frame(obj.type, obj.size, obj.dim)
    names(out) <- c("Type", "Size", "Rows", "Columns")
Holger Brandl's avatar
Holger Brandl committed
531

Holger Brandl's avatar
Holger Brandl committed
532 533
    if (! missing(order.by))
    out <- out[order(out[[order.by]], decreasing = decreasing),]
Holger Brandl's avatar
Holger Brandl committed
534

Holger Brandl's avatar
Holger Brandl committed
535
    if (head)out <- head(out, n)
Holger Brandl's avatar
Holger Brandl committed
536

Holger Brandl's avatar
Holger Brandl committed
537
    out <- transform(out, var_name = rownames(out))
Holger Brandl's avatar
Holger Brandl committed
538 539 540 541
    rownames(out) <- NULL
    arrange(out, Size)
}

Holger Brandl's avatar
Holger Brandl committed
542
# shorthand that just shows top 1 results
Holger Brandl's avatar
Holger Brandl committed
543
lsosh <- function(..., n=10) {
Holger Brandl's avatar
Holger Brandl committed
544
    lsos(..., order.by = "Size", decreasing = TRUE, head = TRUE, n = n)
Holger Brandl's avatar
Holger Brandl committed
545 546
}

Holger Brandl's avatar
Holger Brandl committed
547 548 549 550 551
########################################################################################################################
### Statistics


## outlier handling
Holger Brandl's avatar
Holger Brandl committed
552 553 554
trim_outliers <- function(values, probs=c(0.05, 0.95)){
    values = deResults$pvalue
    stopifnot(length(probs) == 2)
555
    quantiles = quantile(values, probs, na.rm = TRUE)
Holger Brandl's avatar
Holger Brandl committed
556 557 558

    pmax(quantiles[1], pmin(quantiles[2], values))
}
Holger Brandl's avatar
Holger Brandl committed
559 560 561 562

## use trim_outliers instead
#limit_range <- function(values, range)  pmax(range[1], pmin(range[2], values))

Holger Brandl's avatar
Holger Brandl committed
563
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(! is.na(x)))
564 565 566 567 568 569 570 571


########################################################################################################################
### Misc

## inspired by http://stackoverflow.com/questions/8343509/better-error-message-for-stopifnot
assert <- function (expr, error) {
    if (! expr) stop(error, call. = FALSE)
572 573 574
}

### table rendering
Holger Brandl's avatar
Holger Brandl committed
575
table_browser <- function(df, caption=deparse(substitute(df)), ...){
Holger Brandl's avatar
Holger Brandl committed
576
    datatable(df, filter = "bottom", extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')), caption = caption, ...)
577
}
578

Holger Brandl's avatar
Holger Brandl committed
579 580 581
#results_prefix = "env_data_prep"
add_prefix = function(filename) {
    ## prefix a name with a project-prefix. Requires that results_prefix to be defined
582 583 584
    prefixName=if_else(str_length(results_prefix)==0, basename(filename), paste0(results_prefix, ".", basename(filename)))

    file.path(dirname(filename), prefixName)
585 586
}

Holger Brandl's avatar
Holger Brandl committed
587 588 589
## https://stackoverflow.com/questions/18669886/how-to-unfold-user-and-environment-variable-in-r-language/46240642#46240642
interp_from_env = function(path){
    e <- new.env()
590
    env = Sys.getenv() %>% discard(~ str_detect(.x, fixed("()")))
591 592 593
    paste0(make.names(names(env)), "='", gsub("'", '', env), "'") %>%
        map(~eval(parse(text=.), envir=e))
    # (system("export", intern=T) %>% str_split_fixed(" ", 2))[,2] %>% map(~eval(parse(text=.), envir=e))
Holger Brandl's avatar
Holger Brandl committed
594 595 596 597
    glue::glue(path, .envir=e, .open="${")
}

# #usage examples
598
# require(stringr)
Holger Brandl's avatar
Holger Brandl committed
599
# read.delim(interp_from_env("${PRJ_DATA}/foo.txt") )
600
# source(interp_from_env("${HOME}/bar.R"))