Search code examples
rpairwiseemmeans

emmeans Warning in model.frame.default(formula, data = data, ...) : variable 'Group' is not a factor


The data for this question is as follows

example<-structure(structure(list(Group = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", 
"2", "3"), class = "factor"), Subject = c(300L, 300L, 300L, 300L, 
300L, 300L, 300L, 300L, 300L, 300L, 301L, 301L, 301L, 301L, 301L, 
301L, 301L, 301L, 301L, 301L, 302L, 302L, 302L, 302L, 302L, 302L, 
302L, 302L, 302L, 302L, 303L, 303L, 303L, 303L, 303L, 303L, 303L, 
303L, 304L, 304L, 304L, 304L, 304L, 304L, 304L, 304L, 304L, 304L, 
305L, 305L, 305L, 305L, 305L, 305L, 305L, 305L, 305L, 305L, 306L, 
306L, 306L, 306L, 306L, 306L, 306L, 306L, 306L, 306L, 306L, 307L, 
307L, 307L, 307L, 307L, 307L, 307L, 307L, 307L, 307L, 307L, 308L, 
308L, 308L, 308L, 308L, 308L, 308L, 308L, 308L, 308L, 308L, 309L, 
309L, 309L, 309L, 309L, 309L, 309L, 309L, 309L, 309L, 309L, 310L, 
310L, 310L, 310L, 310L, 310L, 310L, 310L, 310L, 310L, 310L, 311L, 
311L, 311L, 311L, 311L, 311L, 311L, 311L, 311L, 311L, 311L, 312L, 
312L, 312L, 312L, 312L, 312L, 312L, 312L, 312L, 312L, 312L, 313L, 
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 314L, 
314L, 314L, 314L, 314L, 314L, 314L, 314L, 314L, 314L, 315L, 315L, 
315L, 315L, 315L, 315L, 315L, 315L, 315L, 315L, 316L, 316L, 316L, 
316L, 316L, 316L, 316L, 316L, 316L, 316L, 317L, 317L, 317L, 317L, 
317L, 317L, 317L, 317L, 317L, 317L, 318L, 318L, 318L, 318L, 318L, 
318L, 318L, 318L, 318L, 318L, 319L, 319L, 319L, 319L, 319L, 319L, 
319L, 319L, 319L, 319L, 319L, 320L, 320L, 320L, 320L, 320L, 320L, 
320L, 320L, 320L, 320L, 320L, 321L, 321L, 321L, 321L, 321L, 321L, 
321L, 321L, 321L, 321L, 321L, 322L, 322L, 322L, 322L, 322L, 322L, 
322L, 322L, 322L, 322L, 322L, 323L, 323L, 323L, 323L, 323L, 323L, 
323L, 323L, 323L, 323L, 324L, 324L, 324L, 324L, 324L, 324L, 324L, 
324L, 324L, 324L, 325L, 325L, 325L, 325L, 325L, 325L, 325L, 325L, 
325L, 325L, 326L, 326L, 326L, 326L, 326L, 326L, 326L, 326L, 326L, 
326L, 327L, 327L, 327L, 327L, 327L, 327L, 327L, 327L, 327L, 327L
), Day = structure(c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 
2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 3L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), .Label = c("0", "1", 
"10", "2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), 
    Pel = c(0L, 0L, 0L, 0L, 182L, 347L, 185L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 259L, 
    387L, 400L, 400L, 365L, 0L, 0L, 0L, 62L, 382L, 400L, 400L, 
    400L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 69L, 90L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 167L, 
    378L, 252L, 382L, 216L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 12L, 300L, 385L, 278L, 0L, 
    38L, 0L, 0L, 0L, 0L, 0L, 180L, 389L, 400L, 397L, 398L, 362L, 
    206L, 0L, 0L, 0L, 0L, 303L, 382L, 400L, 399L, 391L, 296L, 
    359L, 165L, 0L, 0L, 0L, 112L, 400L, 389L, 350L, 228L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 104L, 380L, 360L, 330L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 218L, 373L, 340L, 
    352L, 135L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 248L, 400L, 
    352L, 400L, 0L, 0L, 0L, 0L, 101L, 236L, 250L, 166L, 0L, 0L, 
    0L, 0L, 94L, 167L, 323L, 329L, 400L, 374L, 371L, 240L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    196L, 395L, 398L, 374L, 261L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    390L, 397L, 400L, 389L, 373L, 342L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 296L, 393L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 43L, 
    194L, 174L, 0L, 0L, 0L)), row.names = c(NA, -290L), class = c("tbl_df", 
"tbl", "data.frame")))

When I run the following code

lmm <- lmer(Pel ~ as.factor(Group)*as.factor(Day) +  (1 |Subject), data=example)

summary(lmm)
broom.mixed::tidy(lmm,conf.int=T)

emmeans(lmm, pairwise ~ Group | Day, adjust = "bonferroni") # | Day performs pairwise comparisons by day

I get the following error message

Warning in model.frame.default(formula, data = data, ...) : variable 'Group' is not a factor Warning in model.frame.default(formula, data = data, ...) : variable 'Day' is not a factor

The pairwise comparisons of the groups provides confidence intervals and p values.

I would like to know why I am getting this error, how it can be avoided and if the results of the pairwise comparisons are valid.

Thank you


Solution

  • I did:

    # lmm = ... (as in OP)
    rg = ref_grid(lmm)  # (same warning messages)
    
    lmm2 = lmer(Pel ~ Group*Day +  (1 |Subject), data=example)
    rg2 = ref_grid(lmm2)  # (no warnings)
    
    summary(as.numeric(rg@linfct - rg2@linfct))
    ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    ##       0       0       0       0       0       0 
    

    I have faith in the results from lmm2, and the above shows that the reference grid from lmm has the identical linear functions. So at least we know we can trust the estimates and contrasts you obtained from lmm.

    I ran the call for rg with debugging on, and the warning occurs in this code line in emm_basis.merMod:

    m = model.frame(trms, grid, na.action = na.pass, xlev = xlev)
    

    The last argument, xlev, is a list with names "Group" and "Day". If, before I run that line in the debugger, I do

    names(xlev) = c("as.factor(Group)", "as.factor(Day)")
    

    then the warning goes away.

    Interestingly, if we do:

    example = transform(example, ngrp = as.numeric(Group), nday = as.numeric(Day))
    lmm3 = lmer(Pel ~ as.factor(ngrp)*as.factor(nday) +  (1 |Subject), data=example)
    rg3 = ref_grid(lmm3)
    

    This works fine, with no warnings. The issue is that there is special code that tracks situations where a numeric variable is coerced to a factor; but that tracking is not done when it is already a factor.

    I think this will generally be a harmless error. It may be possible to fix emmeans keep such warnings from happening, but it would be complicated because it would involve matching the factor names in trms (in the call shown above) with the names in the model formula. I'd rather not go there if I can avoid it.