Search code examples
rglmanova

R - passing object to function based on the NAME of the object


Suppose in R I have multiple GLM objects from multiple glm() function calls.

glm_01
glm_02
...
glm_nn

...and suppose that I want to do all possible pairwise comparisons using a chi-squared or F ANOVA test.

anova(glm_01, glm_02, test = "F")
anova(glm_01, glm_03, test = "F")
anova(glm_01, glm_04, test = "F")
...

I don't want to do this manually because the list of models is quite long. Instead I'd like to grab a list of relevant model objects (anything starting with "glm_") and do all pairwise comparisons automatically. However I'm unsure how to pass the model objects (rather than their names in string form) to the anova() function.

As a simple example:

data(mtcars)

# create some models

glm_01 <- glm(mpg ~ cyl                 , mtcars, family = gaussian())
glm_02 <- glm(mpg ~ cyl + disp          , mtcars, family = gaussian())
glm_03 <- glm(mpg ~ cyl + disp + hp     , mtcars, family = gaussian())
glm_04 <- glm(mpg ~ cyl + disp + hp + wt, mtcars, family = gaussian())

# get list of relevant model objects from the R environment

model_list <- ls()
model_list <- model_list[substr(model_list, 1, 4) == "glm_"]

# create a table to store the pairwise ANOVA results

n_models <- length(model_list)
anova_table <- matrix(0, nrow = n_models, ncol = n_models)

# loop through twice and do pairwise comparisons

for(row_index in 1:n_models) {
  for(col_index in 1:n_models) {
      anova_table[row_index, col_index] <- anova(model_list[row_index], model_list[col_index], test = "F")$'Pr(>F)'[2]
  }
}

...but of course this loop at the end doesn't work because I'm not passing model objects to anova(), I'm passing the names of the objects as strings instead. How do I tell anova() to use the object that the string refers to, instead of the string itself?

Thank you.

======================

Possible solution:

data(mtcars)

glm_list <- list()

glm_list$glm_01 <- glm(mpg ~ cyl                 , mtcars, family = gaussian())
glm_list$glm_02 <- glm(mpg ~ cyl + disp          , mtcars, family = gaussian())
glm_list$glm_03 <- glm(mpg ~ cyl + disp + hp     , mtcars, family = gaussian())
glm_list$glm_04 <- glm(mpg ~ cyl + disp + hp + wt, mtcars, family = gaussian())

# create a table to store the pairwise ANOVA results

n_models <- length(glm_list)
anova_table <- matrix(0, nrow = n_models, ncol = n_models)

# loop through twice and do pairwise comparisons

row_idx <- 0
col_idx <- 0

for(row_glm in glm_list)
{
  row_idx <- row_idx + 1
  
  for(col_glm in glm_list)
  {
    col_idx <- col_idx + 1
    anova_table[row_idx, col_idx] <- anova(row_glm, col_glm, test = "F")$'Pr(>F)'[2]
  }
  col_idx <- 0
}
row_idx <- 0

Solution

  • The easiest way to do this would be to keep all your models in a list. This makes it simple to iterate over them. For example, you can create all of your models and do a pairwise comparison between all of them like this:

    data(mtcars)
    
    f_list <- list(mpg ~ cyl, 
                   mpg ~ cyl + disp,
                   mpg ~ cyl + disp + hp,
                   mpg ~ cyl + disp + hp + wt)
    
    all_glms  <- lapply(f_list, glm, data = mtcars, family = gaussian)
    all_pairs <- as.data.frame(combn(length(all_glms), 2))
    result <- lapply(all_pairs, function(i) anova(all_glms[[i[1]]], all_glms[[i[2]]]))
    

    Which gives you:

    result
    #> $V1
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl
    #> Model 2: mpg ~ cyl + disp
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        30     308.33            
    #> 2        29     270.74  1   37.594
    #> 
    #> $V2
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl
    #> Model 2: mpg ~ cyl + disp + hp
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        30     308.33            
    #> 2        28     261.37  2   46.965
    #> 
    #> $V3
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl
    #> Model 2: mpg ~ cyl + disp + hp + wt
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        30     308.33            
    #> 2        27     170.44  3   137.89
    #> 
    #> $V4
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl + disp
    #> Model 2: mpg ~ cyl + disp + hp
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        29     270.74            
    #> 2        28     261.37  1   9.3709
    #> 
    #> $V5
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl + disp
    #> Model 2: mpg ~ cyl + disp + hp + wt
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        29     270.74            
    #> 2        27     170.44  2    100.3
    #> 
    #> $V6
    #> Analysis of Deviance Table
    #> 
    #> Model 1: mpg ~ cyl + disp + hp
    #> Model 2: mpg ~ cyl + disp + hp + wt
    #>   Resid. Df Resid. Dev Df Deviance
    #> 1        28     261.37            
    #> 2        27     170.44  1   90.925
    

    Created on 2020-08-25 by the reprex package (v0.3.0)