I am writing a function using dplyr
package and other related packages, such as tidyr
and tidyselect
. After reading Programming with dplyr and many trials and errors, I was able to write a function that incorporates both data masking/tidy select and name injection to achieve partial goal.
compute_ratio <- function(numerator = dcdata,
denominator = pgdata,
bygroup = c('STUDY', 'SITE'),
nbydp = n_dc_bydp_ym,
numsuffix = dc_byss,
denosuffix = pg_byss){
numerator %>%
dplyr::group_by(dplyr::pick({{ bygroup }})) %>%
dplyr::summarise("n_{{numsuffix}}" := sum({{ nbydp }}, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
# Merge denominator data with numerator data
dplyr::full_join(denominator, by = {{ bygroup }}) %>%
dplyr::mutate("cum_n_{{denosuffix}}" := cumsum("n_{{numsuffix}}"),
"ratio_{{numsuffix}}" := dplyr::if_else("cum_n_{{denosuffix}}" == 0, NA,
round("n_{{numsuffix}}" / "cum_n_{{denosuffix}}", 3)))
}
The current issue is in the last mutate()
. I cannot figure out how to refer to the names which is a name injection in previous lines of the same function. For example, "n_{{numsuffix}}"
is a data variable derived from summarise()
. When I need to refer to this data variable later in the last mutate()
, it now is not on LHS of :=, but the RHS of :=.
Any advice or guidance is appreciated. How to refer to the previous name injection as a data variable later in the same user-defined function?
I have tried .data[[]]
, embracing {{ }}
, !!enquo
. All end with errors.
Sample numerator data - dcdata
structure(list(STUDY = c("A", "B", "A", "A", "A", "A", "A", "B",
"B", "A", "A", "A", "B", "A", "B", "A", "A", "B", "A", "B"),
SITE = c("0187", "086108", "4603", "4304", "8617", "3205",
"0713", "086124", "086048", "6140", "0186", "4407", "086344",
"4413", "061027", "4115", "3403", "086009", "8618", "086020"
), n_dc_bydp_ym = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 1, 1,
0, 0, 0, 0, 0, 1, 2)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Sample denominator data -- pgdata
structure(list(STUDY = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "B"),
SITE = c("0186", "0187", "0713", "3205", "3403", "4115",
"4304", "4407", "4413", "4603", "6140", "8617", "8618", "061027",
"086009", "086020", "086048", "086108", "086124", "086344"
), n_pg_byss = c(2705L, 371L, 1495L, 404L, 1357L, 2089L,
456L, 886L, 830L, 5034L, 912L, 1739L, 1991L, 60L, 18L, 858L,
666L, 759L, 28L, 171L)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Test
test_ratiofun <- compute_ratio(numerator = dcdata,
denominator = pgdata,
bygroup = c('STUDY', 'SITE'),
nbydp = n_dc_bydp_ym,
numsuffix = dc_byss,
denosuffix = pg_byss)
Maybe theres a less convoluted way, but seems to work for what you want
library(tidyverse)
library(rlang)
library(glue)
myfunc <- function(x,numsuffix,denosuffix){
n_text <- as.character(glue("n_{as_name(enquo(numsuffix))}"))
group_by(x,
Species) |>
summarise("{n_text}":= sum(Petal.Length, na.rm = TRUE)) %>%
dplyr::ungroup() |>
mutate(
"cum_n_{{denosuffix}}" := cumsum(!!sym(n_text)))
}
myfunc(iris,whatever,also)
# A tibble: 3 × 3
Species n_whatever cum_n_also
<fct> <dbl> <dbl>
1 setosa 73.1 73.1
2 versicolor 213 286.
3 virginica 278. 564.