Search code examples
rparallel-processingtidyverseglmmtmbmumin

How to use MuMIn::dredge on models in nested data frame (edit: with parallel processing)


I have many glmmTMB models in a nested data frame that I would like to dredge iteratively, but I get this result even when trying to dredge a single model:

Error in eval(call("is.data.frame", gmCall$data), gmEnv) : object '"is.data.frame"' not found

Fitting the same models outside of the nested data frame works without issue as long as the data are saved as an object in the environment. Is there a fix to use nested data frames? Or maybe a list if necessary? I'd rather not have dozens of standalone data frames and accompanying models floating around.

Example using mtcars dataset:

# Example model to fit
mod_fx <- function(x) {
  glmmTMB(mpg ~ disp + drat + wt, 
          data = x,
          family = gaussian(),
          na.action = "na.fail")
}

# Grouping and nesting data
dat <- mtcars %>% 
  group_by(cyl) %>%
  nest() %>% 
  mutate(mod = map(data, mod_fx))

# Model summary works fine, but note: Data = x
# Related to problem?
summary(dat$mod[[1]])

# Dredge fails
dred_dat <- dredge(dat$mod[[1]],
                   rank = "AICc",
                   evaluate = T)

# Dredge does work when same model is built with separate data frame object stored in environment, not a nested data frame.
dat2 <- filter(mtcars, cyl == 6)

m1 <- glmmTMB(mpg ~ disp + drat + wt, 
        data = dat2,
        family = gaussian(),
        na.action = "na.fail")

summary(m1) # Same model results, (data = dat2), but dredge works:

dred_dat <- dredge(m1,
                   rank = "AICc",
                   evaluate = T)

# This also fails without data stored as separate object
m3 <- glmmTMB(mpg ~ disp + drat + wt, 
              data = filter(mtcars, cyl == 6),
              family = gaussian(),
              na.action = "na.fail")

summary(m3) # Data: filter(mtcars, cyl == 6)

dred_dat <- dredge(m3,
                   rank = "AICc",
                   evaluate = T)
# Error in dredge(m3, rank = "AICc", evaluate = T) : 'global.model' uses "data" that is a function value: use a variable instead

Edit:

Failed attempt to add parallel processing to Ben's solution:

clust <- try(makeCluster(detectCores()-1))
invisible(clusterCall(clust, "library", "glmmTMB",
                      character.only = TRUE))

dfun <- function(x) {
  g1 <- glmmTMB(mpg ~ disp + drat + wt, 
                data = x,
                family = gaussian(),
                na.action = "na.fail")
  clusterExport(clust, "data")
  dredge(g1, cluster = clust, rank = "AICc",  evaluate = TRUE)
}

dat <- mtcars %>% 
  group_by(cyl) %>%
  nest() %>% 
  mutate(dredged = map(data, dfun))

Edit: Successful parallelization of dredge on nested data with Ben's solution using doParallel

library(tidyverse)
library(MuMIn)
library(doParallel)

dfun <- function(x) {
  g1 <- glmmTMB(mpg ~ disp + drat + wt, 
                data = x,
                family = gaussian(),
                na.action = "na.fail")
  dredge(g1, rank = "AICc",  evaluate = TRUE)
}

dat <- mtcars %>% 
  group_by(cyl) %>%
  nest()

ncores <- detectCores() - 1L
registerDoParallel(ncores)

dat_dredge <- foreach(i = 1:nrow(dat),
                      .packages = c("MuMIn", "glmmTMB"),
                      .final = function(x) setNames(x, dat$cyl)) %dopar% {
                        dfun(dat$data[[i]])
                      }

Solution

  • Here's one possibility: put the model fitting and dredging within a single function, then nest/map.

    library(tidyverse)
    library(MuMIn)
    
    dfun <- function(x) {
       g1 <- glmmTMB(mpg ~ disp + drat + wt, 
              data = x,
              family = gaussian(),
              na.action = "na.fail")
       dredge(g1, rank = "AICc",  evaluate = TRUE)
    }
    dat <- mtcars %>% 
      group_by(cyl) %>%
      nest() %>% 
      mutate(dredged = map(data, dfun))
    
     dat$dredged[[1]]
    Global model call: glmmTMB(formula = mpg ~ disp + drat + wt, data = x, family = gaussian(), 
        na.action = "na.fail", ziformula = ~0, dispformula = ~1)
    ---
    Model selection table 
      cnd((Int)) dsp((Int)) cnd(dsp) cnd(drt) cnd(wt) df  logLik AICc delta weight
    1     19.740          +                            2 -12.011 31.0  0.00  0.750
    5     28.410          +                    -2.780  3  -9.825 33.7  2.63  0.201
    3     18.490          +            0.3503          3 -11.965 37.9  6.91  0.024
    2     19.080          + 0.003605                   3 -11.974 37.9  6.93  0.024
    6     28.190          + 0.019140           -3.835  4  -7.830 43.7 12.64  0.001
    7     30.650          +           -0.4436  -2.990  4  -9.702 47.4 16.38  0.000
    4      8.536          + 0.022440   1.9780          4 -11.481 51.0 19.94  0.000
    8     16.050          + 0.042090   2.3460  -3.989  5  -4.630 79.3 48.24  0.000
    Models ranked by AICc(x)