f <- function(db, entity, trait) {
db %>%
summarise(i = crossing({{ trait }},{{ trait }}),
.by = {{ entity }})
}
db <- data.frame(
entity = c("A", "A", "B", "B"),
beta = c("X", "Y", "X", "Z")
)
f(db,entity,beta)
Is there a way to make this work? I have the impression that tidyr
does not work very well with functional programming in general. Should I resort to expand_grid
?
dplyr::cross_join
You can achieve it entirely in the dplyr
scope.
db <- data.frame(
entity = c("A", "A", "B", "B"),
beta = c("X", "Y", "X", "Z")
)
library(dplyr)
f1 <- function(db, entity, trait) {
db %>%
reframe(cross_join(pick({{ trait }}), pick({{ trait }})),
.by = {{ entity }})
}
f1(db, entity, beta)
# entity beta.x beta.y
# 1 A X X
# 2 A X Y
# 3 A Y X
# 4 A Y Y
# 5 B X X
# 6 B X Z
# 7 B Z X
# 8 B Z Z
base::expand.grid
You can also use base::expand.grid()
. Note that you cannot replace expand.grid()
with tidyr::expand_grid()
here.
f2 <- function(db, entity, trait) {
db %>%
reframe(expand.grid({{ trait }}, {{ trait }}),
.by = {{ entity }})
}
f2(db, entity, beta)
# entity Var1 Var2
# 1 A X X
# 2 A Y X
# 3 A X Y
# 4 A Y Y
# 5 B X X
# 6 B Z X
# 7 B X Z
# 8 B Z Z
tidyr::expand
To involve tidyr
, you should use expand()
instead of expand_grid()
and crossing()
. Note that expand()
will de-duplicate and sort its inputs.
f3 <- function(db, entity, trait) {
db %>%
group_by({{ entity }}) %>%
tidyr::expand({{ trait }}, {{ trait }}) %>%
ungroup()
}
f3(db, entity, beta)
# # A tibble: 8 × 3
# entity beta...1 beta...2
# <chr> <chr> <chr>
# 1 A X X
# 2 A X Y
# 3 A Y X
# 4 A Y Y
# 5 B X X
# 6 B X Z
# 7 B Z X
# 8 B Z Z
Efficiency: base::expand.grid
> dplyr::cross_join
> tidyr::expand
db_large <- data.frame(
entity = rep(LETTERS[1:3], each = 26),
beta = rep(LETTERS, 3)
)
bench::mark(
`dplyr::cross_join` = f1(db_large, entity, beta),
`base::expand.grid` = f2(db_large, entity, beta),
`tidyr::expand` = f3(db_large, entity, beta),
check = FALSE
)
# # A tibble: 3 × 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 dplyr::cross_jo… 8.14ms 8.42ms 117. 123KB 5.31 44 2 377ms
# 2 base::expand.gr… 3.19ms 3.63ms 272. 128KB 6.43 127 3 467ms
# 3 tidyr::expand 11.46ms 11.71ms 83.7 157KB 6.79 37 3 442ms