How would I implement method dispatch for a function inside of dplyr::do
?
I've read through GitHub issues #719, #3558 and #3429 which have helpful information on how to create methods for dplyr
verbs, but nothing in particular that works for dplyr::do
- which is sort of "special" in the sense that the dispatch not only needs to happen for dplyr:do
itself, but also for the function that is called inside dplyr::do
(or at least that's what I'm after)
library(dplyr)
#>
#> Attache Paket: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Example data ------------------------------------------------------------
df <- tibble::tibble(
id = c(rep("A", 5), rep("B", 5)),
x = 1:10
)
df_custom <- df
class(df_custom) <- c("tbl_df_custom", class(df_custom))
# Reclass function --------------------------------------------------------
reclass <- function(x, result) {
UseMethod('reclass')
}
reclass.default <- function(x, result) {
class(result) <- unique(c(class(x)[[1]], class(result)))
attr(result, class(x)[[1]]) <- attr(x, class(x)[[1]])
result
}
# Custom method for summarize ---------------------------------------------
summarise.tbl_df_custom <- function (.data, ...) {
message("Custom method for `summarise`")
result <- NextMethod("summarise")
ret <- reclass(.data, result)
print(class(ret))
ret
}
ret <- df_custom %>%
summarise(y = mean(x))
#> Custom method for `summarise`
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
ret %>% class()
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
# Custom method for group_by ----------------------------------------------
group_by.tbl_df_custom <- function (.data, ..., add = FALSE) {
message("Custom method for `group_by`")
result <- NextMethod("group_by")
ret <- reclass(.data, result)
print(class(ret))
ret
}
ret <- df_custom %>%
group_by(id) %>%
summarise(y = mean(x))
#> Custom method for `group_by`
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl"
#> [5] "data.frame"
#> Custom method for `summarise`
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
ret %>% class()
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
do
# Custom method for do ----------------------------------------------------
do.tbl_df_custom <- function (.data, ...) {
message("custom method for `do`")
result <- NextMethod("do")
ret <- reclass(.data, result)
print(class(ret))
ret
}
foo <- function(df) {
UseMethod("foo")
}
foo.default <- function(df) {
message("Default method for `foo`")
df %>%
summarise(y = mean(x))
}
foo.tbl_df_custom <- function(df) {
message("Custom method for `foo`")
df %>%
summarise(y = mean(x) * 100)
}
ret <- df_custom %>%
group_by(id) %>%
do(foo(.))
#> Custom method for `group_by`
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl"
#> [5] "data.frame"
#> custom method for `do`
#> Default method for `foo`
#> Default method for `foo`
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl"
#> [5] "data.frame"
ret
#> # A tibble: 2 x 2
#> # Groups: id [2]
#> id y
#> <chr> <dbl>
#> 1 A 3
#> 2 B 8
ret %>% class()
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl"
#> [5] "data.frame"
While this looks okay on first sight, the problem is that the default instead of the custom method for foo
is called.
Created on 2019-01-08 by the reprex package (v0.2.1)
So the issue is related to this question I just asked. I was able to solve it by defining 3 new funtions: ungroup.tbl_df_custom
, a class constructor function, and [.tbl_df_custom
.
ungroup.tbl_df_custom <- function (.data, ...) {
message("custom method for `ungroup`")
result <- NextMethod("ungroup")
ret <- reclass(.data, result)
ret
}
new_custom <- function(x, ...) {
structure(x, class = c("tbl_df_custom", class(x)))
}
`[.tbl_df_custom` <- function(x, ...) {
new_custom(NextMethod())
}
df_custom2 <- new_custom(df)
df_custom2 %>%
group_by(id) %>%
do(foo(.))
Custom method for `group_by`
[1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" "data.frame"
custom method for `do`
custom method for `ungroup`
Custom method for `foo`
Custom method for `summarise`
[1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
Custom method for `foo`
Custom method for `summarise`
[1] "tbl_df_custom" "tbl_df" "tbl" "data.frame"
[1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" "data.frame"
custom method for `ungroup`
# A tibble: 2 x 2
# Groups: id [2]
id y
<chr> <dbl>
1 A 300
2 B 800