Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
datautils
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
1
Issues
1
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
bioinfo
datautils
Commits
31d82898
Commit
31d82898
authored
Aug 14, 2014
by
Holger Brandl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
split up R utils
parent
93b4c0c9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
471 additions
and
0 deletions
+471
-0
R/core_commons.R
R/core_commons.R
+179
-0
R/datatable_utils.R
R/datatable_utils.R
+16
-0
R/df_utils.R
R/df_utils.R
+77
-0
R/ggplot/ggheatmap.R
R/ggplot/ggheatmap.R
+0
-0
R/ggplot/ggplot_utils.R
R/ggplot/ggplot_utils.R
+156
-0
R/holger_utils.R
R/holger_utils.R
+43
-0
No files found.
R/core_commons.R
0 → 100644
View file @
31d82898
## automatic package installation
require.auto
<-
function
(
x
){
x
<-
as.character
(
substitute
(
x
))
if
(
isTRUE
(
x
%in%
.packages
(
all.available
=
TRUE
)))
{
eval
(
parse
(
text
=
paste
(
"require("
,
x
,
", quietly=T)"
,
sep
=
""
)))
}
else
{
# update.packages(ask=F) # update dependencies, if any.
eval
(
parse
(
text
=
paste
(
"install.packages('"
,
x
,
"')"
,
sep
=
""
)))
}
if
(
isTRUE
(
x
%in%
.packages
(
all.available
=
TRUE
)))
{
eval
(
parse
(
text
=
paste
(
"require("
,
x
,
", quietly=T)"
,
sep
=
""
)))
}
else
{
source
(
"http://bioconductor.org/biocLite.R"
)
biocLite
(
character
(),
ask
=
FALSE
)
# update dependencies, if any.
eval
(
parse
(
text
=
paste
(
"biocLite('"
,
x
,
"')"
,
sep
=
""
)))
eval
(
parse
(
text
=
paste
(
"require("
,
x
,
", quietly=T)"
,
sep
=
""
)))
}
}
require.auto
(
plyr
)
require.auto
(
stringr
)
require.auto
(
reshape2
)
#require.auto(reshape2, quietly=T, warn.conflicts=F)
require.auto
(
scales
)
require.auto
(
dplyr
)
#require.auto(data.table)
options
(
help_type
=
"html"
)
options
(
width
=
150
)
# for sqldf to avoid the use of tckl
options
(
gsubfn.engine
=
"R"
)
##### set the r mirror
#cat(".Rprofile: Setting Goettingen repository\n")
r
=
getOption
(
"repos"
)
# hard code the UK repo for CRAN
r
[
"CRAN"
]
=
"http://ftp5.gwdg.de/pub/misc/cran/"
options
(
repos
=
r
)
rm
(
r
)
#qlibrary <- function(libname) {library(as.character(substitute(libname)), quietly=T, warn.conflicts=F, character.only=T )}
########################################################################################################################
#### Small aliases
#praste <- function(...) print(paste(...))
echo
<-
function
(
...
)
print
(
paste
(
...
))
ac
<-
function
(
...
)
as.character
(
...
)
qns
<-
function
()
quit
(
save
=
"no"
)
# string concatenation without space gaps (could/should use paste0 instead)
concat
<-
function
(
...
)
paste
(
...
,
sep
=
""
)
se
<-
function
(
x
)
sd
(
x
,
na.rm
=
TRUE
)
/
sqrt
(
sum
(
!
is.na
(
x
)))
unlen
<-
function
(
x
)
length
(
unique
(
x
))
pp
<-
function
(
dat
)
page
(
dat
,
method
=
"print"
)
########################################################################################################################
#### File System
is.directory
<-
function
(
dirname
)
!
is.na
(
file.info
(
dirname
)
$
isdir
)
mcdir
<-
function
(
dirname
){
if
(
!
file.exists
(
dirname
)){
dir.create
(
dirname
)
}
setwd
(
dirname
)
}
locload
<-
function
(
fileName
)
local
(
get
(
load
(
fileName
)))
rmerge
<-
function
(
LDF
,
by
,
...
){
DF
<-
LDF
[[
1
]]
for
(
i
in
2
:
length
(
LDF
))
{
DF
<-
merge
(
DF
,
LDF
[[
i
]],
by
=
by
)
}
DF
}
trimEnd
<-
function
(
fileNames
,
exts
=
c
()){
for
(
fileExt
in
exts
){
fileNames
<-
str_replace
(
fileNames
,
paste
(
fileExt
,
"$"
,
sep
=
""
),
""
)
}
fileNames
}
chopFileExt
<-
function
(
fileNames
,
exts
=
c
()){
warning
(
"this method is deprecated. use trimEnd instead"
)
for
(
fileExt
in
exts
){
fileNames
<-
str_replace
(
fileNames
,
paste
(
"."
,
fileExt
,
"$"
,
sep
=
""
),
""
)
}
fileNames
}
write.delim
<-
function
(
df
,
header
=
TRUE
,
...
){
write.table
(
df
,
row.names
=
FALSE
,
col.names
=
header
,
sep
=
"\t"
,
...
)
}
## writes a table in bed format expecting columns being ordered according to bed spec already
write.bed
<-
function
(
bedData
,
file
){
write.table
(
bedData
,
file
=
file
,
quote
=
FALSE
,
sep
=
"\t"
,
na
=
"NA"
,
row.names
=
FALSE
,
col.names
=
FALSE
)
}
#options(scipen=100) ## necessary to disable scientific number formats for long integers
rmSomeElements
<-
function
(
vec
,
toDel
)
vec
[
!
(
vec
%in%
toDel
)]
rmLastElement
<-
function
(
vec
)
vec
[
-
length
(
vec
)]
########################################################################################################################
#### File System
## Memory management
# improved list of objects
lsos
<-
function
(
pos
=
1
,
pattern
,
order.by
,
decreasing
=
FALSE
,
head
=
FALSE
,
n
=
5
)
{
napply
<-
function
(
names
,
fn
)
sapply
(
names
,
function
(
x
)
fn
(
get
(
x
,
pos
=
pos
)))
names
<-
ls
(
pos
=
pos
,
pattern
=
pattern
)
obj.class
<-
napply
(
names
,
function
(
x
)
as.character
(
class
(
x
))[
1
])
obj.mode
<-
napply
(
names
,
mode
)
obj.type
<-
ifelse
(
is.na
(
obj.class
),
obj.mode
,
obj.class
)
obj.size
<-
napply
(
names
,
object.size
)
/
1000000
obj.dim
<-
t
(
napply
(
names
,
function
(
x
)
as.numeric
(
dim
(
x
))[
1
:
2
]))
vec
<-
is.na
(
obj.dim
)[,
1
]
&
(
obj.type
!=
"function"
)
obj.dim
[
vec
,
1
]
<-
napply
(
names
,
length
)[
vec
]
out
<-
data.frame
(
obj.type
,
obj.size
,
obj.dim
)
names
(
out
)
<-
c
(
"Type"
,
"Size"
,
"Rows"
,
"Columns"
)
if
(
!
missing
(
order.by
))
out
<-
out
[
order
(
out
[[
order.by
]],
decreasing
=
decreasing
),
]
if
(
head
)
out
<-
head
(
out
,
n
)
# out
out
<-
transform
(
out
,
var_name
=
rownames
(
out
))
rownames
(
out
)
<-
NULL
arrange
(
out
,
Size
)
}
# shorthand
lsosh
<-
function
(
...
,
n
=
10
)
{
lsos
(
...
,
order.by
=
"Size"
,
decreasing
=
TRUE
,
head
=
TRUE
,
n
=
n
)
}
R/
plyr
_utils.R
→
R/
datatable
_utils.R
View file @
31d82898
require.auto
(
data.table
)
dt.merge
<-
function
(
dfA
,
dfB
,
by
=
intersect
(
names
(
dfA
),
names
(
dfB
))
,
...
)
{
# require(data.table)
as.df
(
merge
(
data.table
(
dfA
,
key
=
by
),
data.table
(
dfB
,
key
=
by
),
...
))
# unloadNameSpace(data.table)
}
require
(
data.table
)
# http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table
unique_rows
<-
function
(
df
,
columns
){
unique
(
setkeyv
(
data.table
(
df
),
columns
))
%>%
as.df
()
}
R/df_utils.R
0 → 100644
View file @
31d82898
subsample
<-
function
(
df
,
sampleSize
,
...
){
df
[
sample
(
1
:
nrow
(
df
),
min
(
sampleSize
,
nrow
(
df
)),
...
),]
}
shuffle
<-
function
(
df
)
df
[
sample
(
nrow
(
df
)),]
first
<-
function
(
x
,
n
=
1
)
head
(
x
,
n
)
as.df
<-
function
(
dt
)
as.data.frame
(
dt
)
## small wrappers
rownames2column
<-
function
(
df
,
colname
){
df
<-
as.df
(
df
)
df
$
tttt
<-
rownames
(
df
);
rownames
(
df
)
<-
NULL
;
rename
(
df
,
c
(
tttt
=
colname
))
}
column2rownames
<-
function
(
df
,
colname
){
#browser()
rownames
(
df
)
<-
ac
(
df
[,
colname
])
df
[
colname
]
<-
NULL
return
(
df
)
}
## pushing some columns to the right end of a data.frame
push_right
<-
function
(
df
,
pushColNames
){
df
[,
c
(
setdiff
(
names
(
df
),
pushColNames
),
pushColNames
)]
}
push_left
<-
function
(
df
,
pushColNames
){
df
[,
c
(
pushColNames
,
setdiff
(
names
(
df
),
pushColNames
))]
}
set_names
<-
function
(
df
,
newnames
){
df
<-
as.df
(
df
)
names
(
df
)
<-
newnames
;
return
(
df
)
}
## dplyr utilities
print_head
<-
function
(
df
,
desc
=
NULL
){
print
(
head
(
df
))
return
(
df
)
}
fac2char
<-
function
(
mydata
,
convert
=
names
(
mydata
)[
sapply
(
mydata
,
is.factor
)]){
if
(
length
(
convert
)
==
0
)
return
(
mydata
)
inputColOrder
<-
names
(
mydata
)
convertData
<-
subset
(
mydata
,
select
=
names
(
mydata
)
%in%
convert
)
convertData
<-
as.data.frame
(
lapply
(
convertData
,
as.character
),
stringsAsFactors
=
FALSE
)
keepData
<-
subset
(
mydata
,
select
=!
(
names
(
mydata
)
%in%
convert
))
newdata
<-
cbind
(
convertData
,
keepData
)
newdata
<-
newdata
[,
inputColOrder
]
return
(
newdata
)
}
## workaround for biomart
dselect
<-
function
(
...
)
dplyr
::
select
(
...
)
R/ggheatmap.R
→
R/gg
plot/gg
heatmap.R
View file @
31d82898
File moved
R/ggplot_utils.R
→
R/ggplot
/ggplot
_utils.R
View file @
31d82898
require.auto
(
ggplot2
)
plotPDF
<-
function
(
filename
,
expr
){
pdf
(
paste0
(
filename
,
".pdf"
));
expr
;
dev.off
();
}
#plotPDF("test", plot(1:10))
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, by.row=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot
<-
function
(
...
,
plotlist
=
NULL
,
file
,
cols
=
1
,
layout
=
NULL
)
{
require
(
grid
)
# Make a list from the ... arguments and plotlist
plots
<-
c
(
list
(
...
),
plotlist
)
numPlots
=
length
(
plots
)
# If layout is NULL, then use 'cols' to determine layout
if
(
is.null
(
layout
))
{
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout
<-
matrix
(
seq
(
1
,
cols
*
ceiling
(
numPlots
/
cols
)),
ncol
=
cols
,
nrow
=
ceiling
(
numPlots
/
cols
))
}
if
(
numPlots
==
1
)
{
print
(
plots
[[
1
]])
}
else
{
# Set up the page
grid.newpage
()
pushViewport
(
viewport
(
layout
=
grid.layout
(
nrow
(
layout
),
ncol
(
layout
))))
# Make each plot, in the correct location
for
(
i
in
1
:
numPlots
)
{
# Get the i,j matrix positions of the regions that contain this subplot
matchidx
<-
as.data.frame
(
which
(
layout
==
i
,
arr.ind
=
TRUE
))
print
(
plots
[[
i
]],
vp
=
viewport
(
layout.pos.row
=
matchidx
$
row
,
layout.pos.col
=
matchidx
$
col
))
}
}
}
rotXlab
<-
function
()
theme
(
axis.text.x
=
element_text
(
angle
=
90
,
hjust
=
1
))
gg2Format
=
"png"
ggsave2
<-
function
(
gplot
=
last_plot
(),
width
=
8
,
height
=
6
,
prefix
=
""
,
saveData
=
FALSE
,
outputFormat
=
gg2Format
,
...
){
title
<-
try
(
gplot
$
labels
[[
"title"
]])
if
(
is.null
(
title
)){
varMapping
<-
gplot
$
labels
varMapping
<-
varMapping
[
names
(
varMapping
)
%in%
c
(
"x"
,
"y"
)]
# if(varMapping==NULL){
# varMapping <- gplot$mapping;
# }
if
(
length
(
varMapping
)
==
1
){
title
=
paste
(
"distribution of"
,
varMapping
)
}
else
{
title
=
try
(
paste
(
varMapping
,
collapse
=
" vs "
))
# stop("last plot had no title. Use ggsave() and give it a manual title")
}
rawFacetDesc
<-
format
(
gplot
$
facet
)
if
(
rawFacetDesc
!=
"facet_null()"
){
title
<-
paste
(
title
,
"by"
,
str_replace_all
(
str_match
(
rawFacetDesc
,
"facet_.*[(](.*))"
)[,
2
],
"~"
,
"and"
))
}
}
fileBaseName
<-
ifelse
(
nchar
(
prefix
)
>
0
,
concat
(
prefix
,
" - "
,
title
),
title
)
## clean up weired characters
fileBaseName
<-
str_replace_all
(
fileBaseName
,
"[$%/?]"
,
"_"
)
fileName
=
concat
(
fileBaseName
,
concat
(
"."
,
outputFormat
))
## remove line-breaks and trim spaces
fileName
=
str_replace_all
(
str_replace_all
(
fileName
,
"\\n"
,
""
),
"[ ]{2,}"
,
" "
)
# print(paste("saving plot to ", fileName))
ggsave
(
fileName
,
width
=
width
,
height
=
height
,
...
)
if
(
saveData
){
write.delim
(
gplot
$
data
,
file
=
concat
(
fileBaseName
,
".txt"
))
}
##todo review
LAST_GG2_OUTPUT
<<-
fileName
return
(
fileName
)
}
### alternate over all open devices when running ggplot
## disabled because it's causing problems if X11 device is not available
#ggplot <- function(...){
# dev.set(dev.next())
# ggplot2::ggplot(...)
#}
scale_fill_redgreed
<-
function
()
scale_fill_manual
(
values
=
c
(
"red"
,
"darkgreen"
))
########################################################################################################################
...
...
R/holger_utils.R
0 → 100644
View file @
31d82898
xtab
<-
function
(
mydata
){
tFile
<-
tempfile
(
fileext
=
".csv"
,
tmpdir
=
"~/tmp"
)
write.table
(
mydata
,
file
=
tFile
,
row.names
=
F
,
sep
=
","
)
if
(
str_detect
(
R.version
$
platform
,
"linux"
)){
system
(
paste
(
"ssh bioinfo-mac-6-wifi 'open -a /Applications/XTabulator.app/ /Volumes/bioinfo/tmp/"
,
basename
(
tFile
),
"'"
,
sep
=
""
))
}
else
{
system
(
paste
(
"open -a /Applications/XTabulator.app/ "
,
tFile
))
}
}
xtabh
<-
function
(
mydata
,
rows
=
100
)
xtab
(
head
(
mydata
,
rows
))
## todo remove this
TAB
<-
"\t"
xls
<-
function
(
data
,
remoteDesktop
=
"bioinfo-mac-6.mpi-cbg.de"
){
# isRemote=str_detect(R.version$platform, "linux")
if
(
str_detect
(
R.version
$
platform
,
"linux"
)){
tFile
<-
tempfile
(
fileext
=
".tsv"
,
tmpdir
=
"~/tmp"
)
## http://linux.icydog.net/ssh/piping.php
write.table
(
data
,
file
=
pipe
(
concat
(
"ssh brandl@"
,
remoteDesktop
,
" 'cat - >"
,
tFile
,
"'"
)),
row.names
=
F
,
sep
=
TAB
,
quote
=
F
)
# browser()
# system(paste("ssh ",remoteDesktop," 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" ", tFile, "'", sep=""))
system
(
format
(
paste
(
"ssh "
,
remoteDesktop
,
" 'open -a \"/Applications/_MPI_Applications/Microsoft Office 2011/Microsoft Excel.app\" "
,
tFile
,
"'"
,
sep
=
""
)))
}
else
{
tFile
<-
tempfile
(
fileext
=
".csv"
)
write.table
(
data
,
file
=
tFile
,
row.names
=
F
,
sep
=
","
)
system
(
paste
(
"open -a '/Applications/Microsoft Excel' "
,
tFile
))
}
}
xlsh
<-
function
(
mydata
,
rows
=
100
)
xls
(
head
(
mydata
,
rows
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment