Search code examples
rmarginal-effects

Issue with calculating marginal effects for an ordered logit model in R with ocME


I am attempting to estimate an ordered logit model incl. the marginal effects in R through following the code from this tutorial. I am using polr from the MASS package to estimate the model and ocME from the erer package to attempt to calculate the marginal effects.

Estimating the model is no problem.

logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment, data = data, Hess = T,
                           method = "logistic")

However, I run into an issue with ocME which generates the error message below:

ocME(logitModelSentiment90)

Error in eval(predvars, data, env) : 
numeric 'envir' arg not of length one

The documentation below for ocME states that the object that should be used needs to come from the polr function which seems to be exactly what I am doing.

ocME(w, rev.dum = TRUE, digits = 3)
w = an ordered probit or logit model object estimated by polr from the MASS library.

So can anybody help me to understand what I am doing wrong? I have published a subset of my data with the two variables for the model here. In R I have the DV set up as a factor variable, the IV is continuous.

Side note:

I can pass the calculation to Stata from R with RStata to calculate the marginal effects without any problems. But I don't want to have to do this on a regular basis so I want to understand what is causing the issue with R and ocME.

stata("ologit availability_90_ord mean_sentiment
  mfx", data.in = data)
. ologit availability_90_ord mean_sentiment

Iteration 0:   log likelihood = -15379.121  
Iteration 1:   log likelihood = -15378.742  
Iteration 2:   log likelihood = -15378.742  

Ordered logistic regression                     Number of obs     =     11,901
                                                LR chi2(1)        =       0.76
                                                Prob > chi2       =     0.3835
Log likelihood = -15378.742                     Pseudo R2         =     0.0000

------------------------------------------------------------------------------
avail~90_ord |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
mean_senti~t |   .0044728   .0051353     0.87   0.384    -.0055922    .0145379
-------------+----------------------------------------------------------------
       /cut1 |   -1.14947   .0441059                     -1.235916   -1.063024
       /cut2 |  -.5286239    .042808                     -.6125261   -.4447217
       /cut3 |   .3127556   .0426782                      .2291079    .3964034
------------------------------------------------------------------------------
.       mfx

Marginal effects after ologit
      y  = Pr(availability_90_ord==1) (predict)
         =  .23446398
------------------------------------------------------------------------------
variable |      dy/dx    Std. Err.     z    P>|z|  [    95% C.I.   ]      X
---------+--------------------------------------------------------------------
mean_s~t |  -.0008028      .00092   -0.87   0.384  -.002609  .001004   7.55768
------------------------------------------------------------------------------

Solution

  • Your model has only one explanatory variable (mean_sentiment) and this seems to be a problem for ocME. Try for example to add a second variable to the model:

    logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment + I(mean_sentiment^2),
                                  data = data, Hess = T,  method = "logistic")
    ocME(logitModelSentiment90)
    
    #                     effect.0 effect.1 effect.2 effect.3
    # mean_sentiment        -0.004   -0.001        0    0.006
    # I(mean_sentiment^2)    0.000    0.000        0    0.000
    

    With minor modifications ocME can correctly run also with one independent variable.
    Try the following myocME function

    myocME <- function (w, rev.dum = TRUE, digits = 3) 
    {
        if (!inherits(w, "polr")) {
            stop("Need an ordered choice model from 'polr()'.\n")
        }
        if (w$method != "probit" & w$method != "logistic") {
            stop("Need a probit or logit model.\n")
        }
        lev <- w$lev
        J <- length(lev)
        x.name <- attr(x = w$terms, which = "term.labels")
        x2 <- w$model[, x.name, drop=FALSE]
        ww <- paste("~ 1", paste("+", x.name, collapse = " "), collapse = " ")
        x <- model.matrix(as.formula(ww), data = x2)[, -1, drop=FALSE]
        x.bar <- as.matrix(colMeans(x))
        b.est <- as.matrix(coef(w))
        K <- nrow(b.est)
        xb <- t(x.bar) %*% b.est
        z <- c(-10^6, w$zeta, 10^6)
        pfun <- switch(w$method, probit = pnorm, logistic = plogis)
        dfun <- switch(w$method, probit = dnorm, logistic = dlogis)
        V2 <- vcov(w)
        V3 <- rbind(cbind(V2, 0, 0), 0, 0)
        ind <- c(1:K, nrow(V3) - 1, (K + 1):(K + J - 1), nrow(V3))
        V4 <- V3[ind, ]
        V5 <- V4[, ind]
        f.xb <- dfun(z[1:J] - c(xb)) - dfun(z[2:(J + 1)] - c(xb))
        me <- b.est %*% matrix(data = f.xb, nrow = 1)
        colnames(me) <- paste("effect", lev, sep = ".")
        se <- matrix(0, nrow = K, ncol = J)
        for (j in 1:J) {
            u1 <- c(z[j] - xb)
            u2 <- c(z[j + 1] - xb)
            if (w$method == "probit") {
                s1 <- -u1
                s2 <- -u2
            }
            else {
                s1 <- 1 - 2 * pfun(u1)
                s2 <- 1 - 2 * pfun(u2)
            }
            d1 <- dfun(u1) * (diag(1, K, K) - s1 * (b.est %*% t(x.bar)))
            d2 <- -1 * dfun(u2) * (diag(1, K, K) - s2 * (b.est %*% 
                t(x.bar)))
            q1 <- dfun(u1) * s1 * b.est
            q2 <- -1 * dfun(u2) * s2 * b.est
            dr <- cbind(d1 + d2, q1, q2)
            V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K + j, K + j + 
                1)]
            cova <- dr %*% V %*% t(dr)
            se[, j] <- sqrt(diag(cova))
        }
        colnames(se) <- paste("SE", lev, sep = ".")
        rownames(se) <- colnames(x)
        if (rev.dum) {
            for (k in 1:K) {
                if (identical(sort(unique(x[, k])), c(0, 1))) {
                    for (j in 1:J) {
                      x.d1 <- x.bar
                      x.d1[k, 1] <- 1
                      x.d0 <- x.bar
                      x.d0[k, 1] <- 0
                      ua1 <- z[j] - t(x.d1) %*% b.est
                      ub1 <- z[j + 1] - t(x.d1) %*% b.est
                      ua0 <- z[j] - t(x.d0) %*% b.est
                      ub0 <- z[j + 1] - t(x.d0) %*% b.est
                      me[k, j] <- pfun(ub1) - pfun(ua1) - (pfun(ub0) - 
                        pfun(ua0))
                      d1 <- (dfun(ua1) - dfun(ub1)) %*% t(x.d1) - 
                        (dfun(ua0) - dfun(ub0)) %*% t(x.d0)
                      q1 <- -dfun(ua1) + dfun(ua0)
                      q2 <- dfun(ub1) - dfun(ub0)
                      dr <- cbind(d1, q1, q2)
                      V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K + 
                        j, K + j + 1)]
                      se[k, j] <- sqrt(c(dr %*% V %*% t(dr)))
                    }
                }
            }
        }
        t.value <- me/se
        p.value <- 2 * (1 - pt(abs(t.value), w$df.residual))
        out <- list()
        for (j in 1:J) {
            out[[j]] <- round(cbind(effect = me[, j], error = se[, 
                j], t.value = t.value[, j], p.value = p.value[, j]), 
                digits)
        }
        out[[J + 1]] <- round(me, digits)
        names(out) <- paste("ME", c(lev, "all"), sep = ".")
        result <- listn(w, out)
        class(result) <- "ocME"
        return(result)
    }
    

    and run the following code:

    logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment, 
                                  data = data, Hess = T,  method = "logistic")   
    myocME(logitModelSentiment90)
    
    #                effect.0 effect.1 effect.2 effect.3
    # mean_sentiment   -0.001        0        0    0.001