Search code examples
rmultinomialmlogitmarginal-effects

How to get marginal effects for categorical variables in mlogit?


I want to compute marginal effects for a "mlogit" object where explanatory variables is categorical (factors). While with numerical data effects() throws something, with categorical data it won't.

For simplicity I show a bivariate example below.

numeric variables

# with mlogit
library(mlogit)
ml.dat <- mlogit.data(df3, choice="y", shape="wide")
fit.mnl <- mlogit(y ~ 1 | x, data=ml.dat)

head(effects(fit.mnl, covariate="x", data=ml.dat))
#         FALSE       TRUE
# 1 -0.01534581 0.01534581
# 2 -0.01534581 0.01534581
# 3 -0.20629452 0.20629452
# 4 -0.06903946 0.06903946
# 5 -0.24174312 0.24174312
# 6 -0.39306240 0.39306240

# with glm
fit.glm <- glm(y ~ x, df3, family = binomial)

head(effects(fit.glm))
# (Intercept)           x                                                 
#  -0.2992979  -4.8449254   2.3394989   0.2020127   0.4616640   1.0499595 

factor variables

# transform to factor
df3F <- within(df3, x <- factor(x))
class(df3F$x) == "factor"
# [1] TRUE

While glm() still throws something,

# with glm
fit.glmF <- glm(y ~ x, df3F, family = binomial)

head(effects(fit.glmF))
# (Intercept)           x2           x3           x4           x5           x6 
# 0.115076511 -0.002568206 -0.002568206 -0.003145397 -0.003631992 -0.006290794

the mlogit() approach

# with mlogit
ml.datF <- mlogit.data(df3F, choice="y", shape="wide")
fit.mnlF <- mlogit(y ~ 1 | x, data=ml.datF)

head(effects(fit.mnlF, covariate="x", data=ml.datF))

throws this error:

Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
  contrasts can be applied only to factors with 2 or more levels
In addition: Warning message:
In Ops.factor(data[, covariate], eps) :

 Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
  contrasts can be applied only to factors with 2 or more levels 

How could I solve this?

I already tried to manipulate effects.mlogit() with this answer but it didn't help to solve my problem.

Note: This question is related to this solution, which I want to apply to categorical explanatory variables.


edit

(To demonstrate the issue when applying the given solution to an underlying problem related to a question linked above. See comments.)

# new example ----
library(mlogit)
ml.d <- mlogit.data(df1, choice="y", shape="wide")
ml.fit <- mlogit(y ~ 1 | factor(x), reflevel="1", data=ml.d)

AME.fun2 <- function(betas) {
  aux <- model.matrix(y ~ x, df1)[, -1]
  ml.datF <- mlogit.data(data.frame(y=df1$y, aux), 
                         choice="y", shape="wide")
  frml <- mFormula(formula(paste("y ~ 1 |", paste(colnames(aux), 
                                                  collapse=" + "))))
  fit.mnlF <- mlogit(frml, data=ml.datF)
  fit.mnlF$coefficients <- betas  # probably?
  colMeans(effects(fit.mnlF, covariate="x2", data=ml.datF))  # first co-factor?
}

(AME.mnl <- AME.fun2(ml.fit$coefficients))

require(numDeriv)
grad <- jacobian(AME.fun2, ml.fit$coef)
(AME.mnl.se <- matrix(sqrt(diag(grad %*% vcov(ml.fit) %*% t(grad))), 
                      nrow=3, byrow=TRUE))
AME.mnl / AME.mnl.se
#  doesn't work yet though...

# probably "true" values, obtained from Stata:
# # ame
#         1      2      3      4      5
# 1.     NA     NA     NA     NA     NA   
# 2. -0.400  0.121 0.0971  0.113 0.0686   
# 3. -0.500 -0.179 0.0390  0.166 0.474 
#
# # z-values
#        1     2     3     4     5
# 1.    NA    NA    NA    NA    NA
# 2. -3.86  1.25  1.08  1.36  0.99
# 3. -5.29 -2.47  0.37  1.49  4.06   

data

df3 <- structure(list(x = c(11, 11, 7, 10, 9, 8, 9, 6, 9, 9, 8, 9, 11, 
7, 8, 11, 12, 5, 8, 8, 11, 6, 13, 12, 5, 8, 7, 11, 8, 10, 9, 
10, 7, 9, 2, 10, 3, 6, 11, 9, 7, 8, 4, 12, 8, 12, 11, 9, 12, 
9, 7, 7, 7, 10, 4, 10, 9, 6, 7, 8, 9, 13, 10, 8, 10, 6, 7, 10, 
9, 6, 4, 6, 6, 8, 6, 9, 3, 7, 8, 2, 8, 6, 7, 9, 10, 8, 6, 5, 
5, 7, 9, 1, 6, 11, 11, 9, 7, 8, 9, 9), y = c(TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, 
TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, 
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, 
TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, 
TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, 
TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, 
FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE
)), class = "data.frame", row.names = c(NA, -100L))

> summary(df3)
       x             y          
 Min.   : 1.00   Mode :logical  
 1st Qu.: 7.00   FALSE:48       
 Median : 8.00   TRUE :52       
 Mean   : 8.08                  
 3rd Qu.:10.00                  
 Max.   :13.00  

df1 <- structure(list(y = c(5, 4, 2, 2, 2, 3, 5, 4, 1, 1, 2, 4, 1, 4, 
5, 5, 2, 3, 3, 5, 5, 3, 2, 4, 5, 1, 3, 3, 4, 3, 5, 2, 4, 4, 5, 
5, 5, 2, 1, 5, 1, 3, 1, 4, 1, 2, 2, 4, 3, 1, 4, 3, 1, 1, 5, 2, 
5, 4, 2, 2, 4, 2, 3, 5, 4, 1, 2, 2, 3, 5, 2, 5, 3, 3, 3, 1, 3, 
1, 1, 4, 3, 4, 5, 2, 1, 1, 3, 1, 5, 4, 4, 2, 5, 3, 4, 4, 3, 1, 
5, 2), x = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 
2L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 
3L, 2L, 2L, 2L, 3L, 2L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 
1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 
2L, 2L, 3L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 
2L, 1L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 3L, 2L, 
3L, 2L, 3L, 1L, 2L, 3L, 3L, 1L, 2L, 2L), .Label = c("1", "2", 
"3"), class = "factor")), row.names = c(NA, -100L), class = "data.frame")

Solution

  • It is kind of expected that effects doesn't work with factors since otherwise the output would contain another dimension, somewhat complicating the results, and it is quite reasonable that, just like in my solution below, one may instead want effects only for a certain factor level, rather than all the levels. Also, as I explain below, the marginal effects in the case of categorical variables are not uniquely defined, so that would be an additional complication for effects.

    A natural workaround is to manually convert the factor variables into a series of dummy variables as in

    aux <- model.matrix(y ~ x, df3F)[, -1]
    head(aux)
    #   x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13
    # 1  0  0  0  0  0  0  0  0   0   1   0   0
    # 2  0  0  0  0  0  0  0  0   0   1   0   0
    # 3  0  0  0  0  0  1  0  0   0   0   0   0
    # 4  0  0  0  0  0  0  0  0   1   0   0   0
    # 5  0  0  0  0  0  0  0  1   0   0   0   0
    # 6  0  0  0  0  0  0  1  0   0   0   0   0
    

    so that the data then is

    ml.datF <- mlogit.data(data.frame(y = df3F$y, aux), choice = "y", shape = "wide")
    

    We also need to construct the formula manually with

    frml <- mFormula(formula(paste("y ~ 1 |", paste(colnames(aux), collapse = " + "))))
    

    So far so good. Now if we run

    fit.mnlF <- mlogit(frml, data = ml.datF)
    head(effects(fit.mnlF, covariate = "x2", data = ml.datF))
    #           FALSE         TRUE
    # 1 -1.618544e-15 0.000000e+00
    # 2 -1.618544e-15 0.000000e+00
    # 3 -7.220891e-08 7.221446e-08
    # 4 -1.618544e-15 0.000000e+00
    # 5 -5.881129e-08 5.880851e-08
    # 6 -8.293366e-08 8.293366e-08
    

    then the results are not correct. What effects did here is that it saw x2 as a continuous variable and computed the usual marginal effect for those cases. Namely, if the coefficient corresponding to x2 is b2 and our model is f(x,b2), effects computed the derivative of f with respect to b2 and evaluated at each observed vector xi. This is wrong because x2 only takes values 0 and 1, not something around 0 or around 1, which is what taking the derivate assumes (the concept of a limit)! For instance, consider your other dataset df1. In that case we incorrectly get

    colMeans(effects(fit.mnlF, covariate = "x2", data = ml.datF))
    #           1           2           3           4           5 
    # -0.25258378  0.07364406  0.05336283  0.07893391  0.04664298
    

    Here's another way (using derivative approximation) to get this incorrect result:

    temp <- ml.datF
    temp$x2 <- temp$x2 + 0.0001
    colMeans(predict(fit.mnlF, newdata = temp, type = "probabilities") - 
                 predict(fit.mnlF, newdata = ml.datF, type = "probabilities")) / 0.0001
    #           1           2           3           4           5 
    # -0.25257597  0.07364089  0.05336032  0.07893273  0.04664202 
    

    Instead of using effects, I computed the wrong marginal effects by hand by using predict twice: the result is mean({fitted probability with x2new = x2old + 0.0001} - {fitted probability with x2new = x2old}) / 0.0001. That is, we looked at the change in the predicted probability by moving x2 up by 0.0001, which is either from 0 to 0.0001 or from 1 to 0.0001. Both of those don't make sense. Of course, we shouldn't expect anything else from effects since x2 in the data is numeric.

    So then the question is how to compute right (average) marginal effects. As I said, marginal effect for categorical variables is not uniquely defined. Suppose that x_i is whether an individual i has a job and y_i is whether they have a car. So, then there are at least the following six things to consider.

    1. Effect on the probability of y_i = 1 when going from x_i=0 to x_i=1.
    2. When going from x_i=0 to x_i (the observed value).
    3. From x_i to 1.

    Now when we are interested in average marginal effects, we may want to average only over those individuals for whom the change in 1-3 makes a difference. That is,

    1. From x_i=0 to x_i=1 if the observed value isn't 1.
    2. From x_i=0 to x_i if the observed value isn't 0.
    3. From x_i to 1 if the observed value isn't 1.

    According to your results, Stata uses option 5, so I'll reproduce the same results, but it is straightforward to implement any other option, and I suggest to think which ones are interesting in your particular application.

    AME.fun2 <- function(betas) {
      aux <- model.matrix(y ~ x, df1)[, -1]
      ml.datF <- mlogit.data(data.frame(y = df1$y, aux), choice="y", shape="wide")
      frml <- mFormula(formula(paste("y ~ 1 |", paste(colnames(aux), collapse=" + "))))
      fit.mnlF <- mlogit(frml, data = ml.datF)
      fit.mnlF$coefficients <- betas
      aux <- ml.datF # Auxiliary dataset
      aux$x3 <- 0 # Going from 0 to the observed x_i
      idx <- unique(aux[aux$x3 != ml.datF$x3, "chid"]) # Where does it make a change?
      actual <- predict(fit.mnlF, newdata = ml.datF)
      counterfactual <- predict(fit.mnlF, newdata = aux)
      colMeans(actual[idx, ] - counterfactual[idx, ])
    }
    (AME.mnl <- AME.fun2(ml.fit$coefficients))
    #           1           2           3           4           5 
    # -0.50000000 -0.17857142  0.03896104  0.16558441  0.47402597 
    
    require(numDeriv)
    grad <- jacobian(AME.fun2, ml.fit$coef)
    AME.mnl.se <- matrix(sqrt(diag(grad %*% vcov(ml.fit) %*% t(grad))), nrow = 1, byrow = TRUE)
    AME.mnl / AME.mnl.se
    #           [,1]      [,2]    [,3]     [,4]     [,5]
    # [1,] -5.291503 -2.467176 0.36922 1.485058 4.058994