Hi all I had a question about non-standard evaluation. I fit a few models with different outcome variables and compute the marginal effects like this.
library(palmerpenguins)
library(marginaleffects)
library(sandwich)
library(tidyr)
library(dplyr)
long_pengs = penguins |>
pivot_longer(cols = c(body_mass_g, flipper_length_mm),
names_to = 'outcome',
values_to = 'vals') |>
drop_na(sex) |>
summarise(mods = list(lm(vals ~ sex * bill_length_mm, data = pick(everything()))), .by = outcome)
comps = long_pengs |>
rowwise(outcome) |>
reframe(avg_comparisons(mods,
variables = 'sex',
subset(sex == 'female')))
However when I try to cluster bootstrap the standard errors I run into these error message and am wondering how to resolve this issue. I am not wedded to doing this with non-standard evaluation.
# works
long_pengs |>
rowwise(outcome) |>
reframe(avg_comparisons(mods,
variables = 'sex',
subset(sex == 'female')) |>
inferences(method = 'rsample'))
long_pengs |>
rowwise(outcome) |>
reframe(avg_comparisons(mods,
variables = 'sex',
subset(sex == 'female'),
vcov = vcovBS(mods, cluster = ~species)))
#> Error in `reframe()`:
#> ℹ In argument: `avg_comparisons(...)`.
#> ℹ In row 1.
#> Caused by error:
#> ! Obsolete data mask.
#> ✖ Too late to resolve `species` after the end of `dplyr::summarise()`.
#> ℹ Did you save an object that uses `species` lazily in a column in the
#> `dplyr::summarise()` expression ?
long_pengs |>
rowwise(outcome) |>
reframe(avg_comparisons(mods,
variables = 'sex',
subset(sex == 'female')) |>
inferences(method = 'rsample', strata = species))
#> Error in `reframe()`:
#> ℹ In argument: `inferences(...)`.
#> ℹ In row 1.
#> Caused by error:
#> ! object 'species' not found
The desired output would look something like this
## desired output
m1 = lm(body_mass_g ~ sex * bill_length_mm, data = penguins)
c1 = avg_comparisons(m1, variables = 'sex',
subset(sex == 'female'),
vcov = vcovBS(m1, cluster = ~species))
m2 = lm(flipper_length_mm ~ sex * bill_length_mm, data = penguins)
c2 = avg_comparisons(m2, variables = 'sex',
subset(sex == 'female'),
vcov = vcovBS(m2, cluster = ~species))
rbind(c1, c2)
#>
#> Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> 420.487 293.10 1.435 0.151 2.7 -153.97 994.9
#> 0.392 5.16 0.076 0.939 0.1 -9.73 10.5
#>
#> Term: sex
#> Type: response
#> Comparison: mean(male) - mean(female)
#> Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted
Created on 2024-11-13 with reprex v2.1.1
You could do it by baking the data into the model call.
library(palmerpenguins)
library(marginaleffects)
library(sandwich)
library(tidyr)
library(dplyr)
long_pengs = penguins |>
pivot_longer(cols = c(body_mass_g, flipper_length_mm),
names_to = 'outcome',
values_to = 'vals') |>
drop_na(sex) |>
summarise(mods = list(do.call(lm, list(formula=vals ~ sex * bill_length_mm, data = pick(everything())))), .by = outcome)
long_pengs |>
rowwise(outcome) |>
reframe(avg_comparisons(mods,
variables = 'sex',
subset(sex == 'female'),
vcov = vcovBS(mods, cluster = ~species)))
#> # A tibble: 2 × 13
#> outcome term contrast estimate std.error statistic p.value s.value conf.low
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 body_mas… sex mean(ma… 420. 288. 1.46 0.144 2.79 -144.
#> 2 flipper_… sex mean(ma… 0.392 4.72 0.0831 0.934 0.0989 -8.86
#> # ℹ 4 more variables: conf.high <dbl>, predicted_lo <dbl>, predicted_hi <dbl>,
#> # predicted <dbl>
Created on 2024-11-13 with reprex v2.1.0