I want to create a single function called eval_data
where the user can input
dplyr
functions to apply to the data framesThis will look something like:
eval_data <- function(data, dplyr_logic, select_vector) {
data %>%
# this doesn't work
eval(dplyr_logic) %>%
select(
{ select_vector }
)
}
The dplyr_logic
is a list of either:
dd <- list()
dd$data <- list(
mutate0 = iris,
mutate1 = iris,
mutate2= iris,
filter1 = iris
)
select_vec <- list(
c("Species", "Sepal.Length"),
c("Species", "New_Column1"),
c("Species", "New_Column2", "New_Column3"),
c("Species", "Sepal.Width")
)
logic <- list(
# do nothing -- this one works
I(),
#mutate1
rlang::expr(mutate(New_Column1 = case_when(
Sepal.Length > 7 ~'Big',
Sepal.Length > 6 ~ 'Medium',
TRUE ~ 'Small'
)
)),
#mutate2
rlang::expr(mutate(New_Column2 = case_when(
Sepal.Width > 3.5 ~'Big2',
Sepal.Width > 3 ~ 'Medium2',
TRUE ~ 'Small2'
)) %>%
mutate(
New_Column3 = case_when(
Petal.Width > 2 ~'Big3',
Petal.Width > 1 ~ 'Medium3',
TRUE ~ 'Small3'
)
)
),
#filter1
rlang::expr(filter(Sepal.Width > 3))
)
# eval_data(dd$data[[1]], logic[[1]], select_vec[[1]]) works
# eval_data(dd$data[[2]], logic[[2]], select_vec[[2]]) does not
pmap(dd$data, logic, select_vec, ~eval_data)
pmap_output <- list(
iris1 = iris %>% I() %>% select("Species", "Sepal.Length"),
iris2 = iris %>%
mutate(New_Column1 =
case_when(
Sepal.Length > 7 ~'Big',
Sepal.Length > 6 ~ 'Medium',
TRUE ~ 'Small')) %>%
select("Species", "New_Column1"),
iris4 = iris %>%
mutate(New_Column2 = case_when(
Sepal.Width > 3.5 ~'Big2',
Sepal.Width > 3 ~ 'Medium2',
TRUE ~ 'Small2'
)) %>%
mutate(
New_Column3 = case_when(
Petal.Width > 2 ~'Big3',
Petal.Width > 1 ~ 'Medium3',
TRUE ~ 'Small3'
)
) %>%
select("Species", "New_Column2", "New_Column3"),
iris3 = iris %>% filter(Sepal.Width > 3) %>% select("Species", "Sepal.Width")
)
What do I need to change in eval_data
and the logic
list in order to make this work? Any help appreciated!!
Two changes. First, you need to include data %>%
into your dplyr logic evaluation:
eval_data <- function(data, dplyr_logic, select_vector) {
rlang::expr( data %>% !!dplyr_logic ) %>%
eval() %>%
select( one_of(select_vector) )
}
Second, the chained mutate is actually a bit tricky. Recall that x %>% f(y)
can be rewritten as f(x,y)
. Your double-mutate expression can therefore be re-written as mutate( mutate(expr1), expr2 )
. When you feed the data to it, it becomes
mutate(data, mutate(expr1), expr2)
instead of the desired
mutate(mutate(data, expr1), expr2)
So, we need to use the pronoun .
to specify where the pipe input should go to in our complex expression:
logic <- rlang::exprs( # We can use exprs instead of list(expr())
I(),
mutate(New_Column1 = case_when(
Sepal.Length > 7 ~'Big',
Sepal.Length > 6 ~ 'Medium',
TRUE ~ 'Small'
)),
{mutate(., New_Column2 = case_when( # <--- NOTE the { and the .
Sepal.Width > 3.5 ~'Big2',
Sepal.Width > 3 ~ 'Medium2',
TRUE ~ 'Small2')) %>%
mutate(
New_Column3 = case_when(
Petal.Width > 2 ~'Big3',
Petal.Width > 1 ~ 'Medium3',
TRUE ~ 'Small3'
))}, # <--- NOTE the matching }
filter(Sepal.Width > 3)
)
Everything works now:
res <- pmap(list(dd$data, logic, select_vec), eval_data)
## Compare to desired output
map2_lgl( res, pmap_output, identical )
# mutate0 mutate1 mutate2 filter1
# TRUE TRUE TRUE TRUE