-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathex06_runif-via-pmap.R
191 lines (162 loc) · 4.63 KB
/
ex06_runif-via-pmap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#' ---
#' title: "Generate data from different distributions via pmap()"
#' author: "Jenny Bryan"
#' date: "`r format(Sys.Date())`"
#' output: github_document
#' ---
#+ setup, include = FALSE, cache = FALSE
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
error = TRUE
)
options(tidyverse.quiet = TRUE)
#+ body
# ----
#' ## Uniform[min, max] via `runif()`
#'
#' CONSIDER:
#' ```
#' runif(n, min = 0, max = 1)
#' ```
#'
#' Want to do this for several triples of (n, min, max).
#'
#' Store each triple as a row in a data frame.
#'
#' Now iterate over the rows.
library(tidyverse)
#' Notice how df's variable names are same as runif's argument names. Do this
#' when you can!
df <- tribble(
~ n, ~ min, ~ max,
1L, 0, 1,
2L, 10, 100,
3L, 100, 1000
)
df
#' Set seed to make this repeatedly random.
#'
#' Practice on single rows.
set.seed(123)
(x <- df[1, ])
runif(n = x$n, min = x$min, max = x$max)
x <- df[2, ]
runif(n = x$n, min = x$min, max = x$max)
x <- df[3, ]
runif(n = x$n, min = x$min, max = x$max)
#' Think out loud in pseudo-code.
## x <- df[i, ]
## runif(n = x$n, min = x$min, max = x$max)
## runif(n = df$n[i], min = df$min[i], max = df$max[i])
## runif with all args from the i-th row of df
#' Just. Do. It. with `pmap()`.
set.seed(123)
pmap(df, runif)
#' ## Finessing variable and argument names
#'
#' Q: What if you can't arrange it so that variable names and arg names are
#' same?
foofy <- tibble(
alpha = 1:3, ## was: n
beta = c(0, 10, 100), ## was: min
gamma = c(1, 100, 1000) ## was: max
)
foofy
#' A: Rename the variables on-the-fly, on the way in.
set.seed(123)
foofy %>%
rename(n = alpha, min = beta, max = gamma) %>%
pmap(runif)
#' A: Write a wrapper around `runif()` to say how df vars <--> runif args.
## wrapper option #1:
## ARGNAME = l$VARNAME
my_runif <- function(...) {
l <- list(...)
runif(n = l$alpha, min = l$beta, max = l$gamma)
}
set.seed(123)
pmap(foofy, my_runif)
## wrapper option #2:
my_runif <- function(alpha, beta, gamma, ...) {
runif(n = alpha, min = beta, max = gamma)
}
set.seed(123)
pmap(foofy, my_runif)
#' You can use `..i` to refer to input by position.
set.seed(123)
pmap(foofy, ~ runif(n = ..1, min = ..2, max = ..3))
#' Use this with *extreme caution*. Easy to shoot yourself in the foot.
#'
#' ## Extra variables in the data frame
#'
#' What if data frame includes variables that should not be passed to `.f()`?
df_oops <- tibble(
n = 1:3,
min = c(0, 10, 100),
max = c(1, 100, 1000),
oops = c("please", "ignore", "me")
)
df_oops
#' This will not work!
set.seed(123)
pmap(df_oops, runif)
#' A: use `dplyr::select()` to limit the variables passed to `pmap()`.
set.seed(123)
df_oops %>%
select(n, min, max) %>% ## if it's easier to say what to keep
pmap(runif)
set.seed(123)
df_oops %>%
select(-oops) %>% ## if it's easier to say what to omit
pmap(runif)
#' A: Use a custom wrapper and absorb extra variables with `...`.
my_runif <- function(n, min, max, ...) runif(n, min, max)
set.seed(123)
pmap(df_oops, my_runif)
#' ## Add the generated data to the data frame as a list-column
set.seed(123)
(df_aug <- df %>%
mutate(data = pmap(., runif)))
#View(df_aug)
#' What about computing within a data frame, in the presence of the
#' complications discussed above? Use `list()` in the place of the `.`
#' placeholder above to select the target variables and, if necessary, map
#' variable names to argument names. *Thanks @hadley for [sharing this
#' trick](https://community.rstudio.com/t/dplyr-alternatives-to-rowwise/8071/29).*
#'
#' How to address variable names != argument names:
foofy <- tibble(
alpha = 1:3, ## was: n
beta = c(0, 10, 100), ## was: min
gamma = c(1, 100, 1000) ## was: max
)
set.seed(123)
foofy %>%
mutate(data = pmap(list(n = alpha, min = beta, max = gamma), runif))
#' How to address presence of 'extra variables' with either an inclusion or
#' exclusion mentality
df_oops <- tibble(
n = 1:3,
min = c(0, 10, 100),
max = c(1, 100, 1000),
oops = c("please", "ignore", "me")
)
set.seed(123)
df_oops %>%
mutate(data = pmap(list(n, min, max), runif))
df_oops %>%
mutate(data = pmap(select(., -oops), runif))
#' ## Review
#'
#' What have we done?
#'
#' * Arranged inputs as rows in a data frame
#' * Used `pmap()` to implement a loop over the rows.
#' * Used dplyr verbs `rename()` and `select()` to manipulate data on the way
#' into `pmap()`.
#' * Wrote custom wrappers around `runif()` to deal with:
#' - df var names != `.f()` arg names
#' - df vars that aren't formal args of `.f()`
#' * Demonstrated all of the above when working inside a data frame and adding
#' generated data as a list-column