ci_commons.R 5.15 KB
Newer Older
Holger Brandl's avatar
Holger Brandl committed
1
2

# requires
3
if(!exists("load_pack")){
Holger Brandl's avatar
Holger Brandl committed
4
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.46/R/core_commons.R")
5
6
7
}

if(!exists("multiplot")){
Holger Brandl's avatar
Holger Brandl committed
8
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.46/R/ggplot_commons.R")
9
}
Holger Brandl's avatar
Holger Brandl committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

calc_ci = function(df, variable, ci_interval=0.95){
    variable <- enquo(variable)

    # http://dplyr.tidyverse.org/articles/programming.html
    mean_name <- paste0( quo_name(variable), "_mean")
    ci_name <- paste0(quo_name(variable), "_ci")
    # echo(glue::glue("varname is {ci_name}"))

    df %>% summarize(
    mean=mean(!!variable),
    sd=sd(!!variable),
    N = n(),
    se=sd/sqrt(N),
    !!ci_name := qt(ci_interval/2+0.5, N-1)*se,
    !!mean_name := mean
    ) %>% select(-c(mean, sd, N, se, mean))
}

Holger Brandl's avatar
Holger Brandl committed
29
30
# iris %>% group_by(Species) %>% calc_ci(Sepal.Length)
# iris %>% group_by(Species, Sepal.Width>3) %>% calc_ci(Sepal.Length)
Holger Brandl's avatar
Holger Brandl committed
31
32
33
34
35
36


#' now with plotting
plot_ci = function(grpData, variable, ci_interval=0.95){
    variable <- enquo(variable)

Holger Brandl's avatar
Holger Brandl committed
37
38
    # browser()

Holger Brandl's avatar
Holger Brandl committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    # calculte ci intervals
    ciData = grpData %>% summarize(
        mean=mean(!!variable),
        sd=sd(!!variable),
        N = n(),
        se=sd/sqrt(N),
        ci = qt(ci_interval/2+0.5, N-1)*se,
    )


    #fail if there are more than one group attributes
    assert(length(groups(grpData)) < 3, "more than 2 groups are not supported")

    groupVar1 = groups(grpData)[[1]]
    gg = ggplot(grpData, aes(x= eval(rlang::UQE(groupVar1)), y= eval(rlang::UQE(variable)))) +
Holger Brandl's avatar
Holger Brandl committed
54
        geom_jitter(alpha=0.3, height=0,width=0.2) +
Holger Brandl's avatar
Holger Brandl committed
55
56
        geom_errorbar(aes(ymin= mean-ci, ymax= mean+ci, y=NULL), data=ciData, width=.2, size=0.9)

Holger Brandl's avatar
Holger Brandl committed
57
58
    gg = gg + xlab(groupVar1) + ylab(quo_name(variable))

Holger Brandl's avatar
Holger Brandl committed
59
60
61
62
63
64
    # if 2 grouping variables are present add facetting on second grouping attribute
    if(length(groups(grpData)) ==2){
        # https://stackoverflow.com/questions/21588096/pass-string-to-facet-grid-ggplot2
        gg =  gg + facet_wrap(as.formula(paste("~",groups(grpData)[[2]])))
    }

Holger Brandl's avatar
Holger Brandl committed
65
    gg
Holger Brandl's avatar
Holger Brandl committed
66
67
68
}


Holger Brandl's avatar
Holger Brandl committed
69
## todo support model formula instead of group data
Holger Brandl's avatar
Holger Brandl committed
70
interaction_plot = function(grpData, variable, ci_interval=0.95){
71

Holger Brandl's avatar
Holger Brandl committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    variable <- enquo(variable)

    #fail if there are more not 2 group attributes
    assert(length(groups(grpData)) == 2, "only dfs with 2 grouping vars are supported")

    # calculte ci intervals
    ciData = grpData %>% summarize(
        mean=mean(!!variable),
        sd=sd(!!variable),
        N = n(),
        se=sd/sqrt(N),
        ci = qt(ci_interval/2+0.5, N-1)*se,
    )


87
88
89
90
    trimEval = function(name){
        if(!str_detect(name, "^eval[(]")) return(name)
        str_match(name, "eval[(](.*)[)]")[2]
    }
Holger Brandl's avatar
Holger Brandl committed
91
92
    groupVar1 = groups(grpData)[[1]]
    groupVar2 = groups(grpData)[[2]]
93

Holger Brandl's avatar
Holger Brandl committed
94
95
96
97
98
    dodge_with=0.2

    gg = ggplot(grpData, aes(x = eval(rlang::UQE(groupVar1)), y = eval(rlang::UQE(variable)), color = eval(rlang::UQE(groupVar2)))) +
        geom_jitter(position = position_jitterdodge(jitter.width = 0.1, dodge.width = dodge_with), alpha = 0.3) +
        geom_errorbar(aes(ymin = mean - ci, ymax = mean + ci, y = NULL), data = ciData, width = .2, size = 0.9, position = position_dodge(width = dodge_with)) +
Holger Brandl's avatar
Holger Brandl committed
99
        geom_line(aes(y = mean, group = eval(rlang::UQE(groupVar2))), position = position_dodge(width = dodge_with), data = ciData) +
100
        xlab(trimEval(groupVar1)) +
101
        ylab(quo_name(variable)) +
102
        guides(color=guide_legend(trimEval(groupVar2)))
103

Holger Brandl's avatar
Holger Brandl committed
104
105
106
107

    gg
}

Holger Brandl's avatar
Holger Brandl committed
108
# for interpretation see https://courses.washington.edu/smartpsy/interactions.htm
Holger Brandl's avatar
Holger Brandl committed
109
two_way_interaction = function(grpData, variable){
110
111
112
    # Example:
    # grpData = ToothGrowth %>% group_by(supp, as.factor(dose))

Holger Brandl's avatar
Holger Brandl committed
113
114
115
116
117
118
119
120
121
122
123
124
125
    ## invert the grouping
    groupVar1 = groups(grpData)[[1]]
    groupVar2 = groups(grpData)[[2]]

    regrouped = grpData %>% group_by(eval(rlang::UQE(groupVar2)), eval(rlang::UQE(groupVar1)))

    multiplot(
    interaction_plot(grpData, !!enquo(variable)),
    interaction_plot(regrouped, !!enquo(variable))
    )
}


126
127
128
129
130
131
132
133
## EXAMPLES-START
if(F){
# interaction_plot(ToothGrowth %>% group_by(supp, as.factor(dose)),len)

ToothGrowth %>% group_by(supp, as.factor(dose))  %>% interaction_plot(len)
}
## EXAMPLES-END

Holger Brandl's avatar
Holger Brandl committed
134
135
136
137
138

########################################################################################################################
### DEV PLAYGROUND


Holger Brandl's avatar
Holger Brandl committed
139
140
## DEBUG-START
if(F){
141
142

    source("/Users/brandl/Dropbox/projects/datautils/R/stats/ci_commons.R")
Holger Brandl's avatar
Holger Brandl committed
143
144

    lmModel = lm(len ~ supp*dose, data = ToothGrowth)
Holger Brandl's avatar
Holger Brandl committed
145
146
147
148
149
150
    varNames = attr(attr(lmModel$terms, "factors"), "dimnames")[[1]]

    grpData = lmModel$model %>%  group_by_at(vars(one_of(varNames[2], varNames[3])))


    two_way_interaction(grpData, eval(parse(text=varNames[1])))
Holger Brandl's avatar
Holger Brandl committed
151
152
153
154
155

    ToothGrowth %>% group_by(supp, as.factor(dose)) %>% plot_confint(len)
    ToothGrowth %>% group_by(supp, dose) %>% plot_ci(len)
    # ToothGrowth %>% mutate_inplace(dose, as.factor()) %>% group_by(supp, dose) %>% plot_ci(len)

Holger Brandl's avatar
Holger Brandl committed
156
157
158
159
160
    # .plot_confint = function(grpData, variable, ci_interval=0.95) plot_confint(grpData, quo_name(variable), ci_interval)

    model_interaction = function(model, variable){
        lmModel %>% str
        lmModel$model
Holger Brandl's avatar
Holger Brandl committed
161
162
    }

Holger Brandl's avatar
Holger Brandl committed
163
    ToothGrowth %>% group_by(supp, as.factor(dose))  %>% interaction_plot2(len)
Holger Brandl's avatar
Holger Brandl committed
164
165
166
167
168

}
## DEBUG-END


Holger Brandl's avatar
Holger Brandl committed
169