Search code examples
rregressionpanelplm

How to get between and overall R2 from plm FE regression?


Is there a way to get plm() to calculate between R2 and overall R2 for me and include them in the summary() output?

To clarify what I mean with between, overall, and within R2 see this answer on StackExchange.

My understanding is that plm only calculates within R2. I am running a Twoways effects Within Model.

A random example (adapted from here):

library(plm)
# Create some random data
set.seed(1) 
x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
y=x+fe+e

data=data.frame(y,x,id,ti)

# Get plm within R2
reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
summary(reg)$r.squared

I now also want to get overall and between R2:

# Pooled Version (overall R2)
reg1=lm(y~x)
summary(reg1)$r.squared

# Between R2
y.means=tapply(y,id,mean)[id]
x.means=tapply(x,id,mean)[id]

reg2=lm(y.means~x.means)
summary(reg3)$r.squared

Solution

  • It does not appear that {plm} can report the overall or within quote-unquote R-squared values. You can hack it by creating a custom summary and print method:

    summary.plm.full <- function (object, vcov = NULL, ...) 
    {
      vcov_arg <- vcov
    
      #add plm::: for plm functions so they are calllex correctly
      model <- plm:::describe(object, "model")
      effect <- plm:::describe(object, "effect")
      random.method <- plm:::describe(object, "random.method")
      object$r.squared <- c(rsq = r.squared(object), 
                            adjrsq = r.squared(object, dfcor = TRUE),
                            # add the two new r squared terms here
                            rsq_overall = r.squared(object, model = "pooled"),
                            rsq_btw = r.squared(update(object, effect = "individual", model = "between")))
    
      use.norm.chisq <- FALSE
      if (model == "random") 
        use.norm.chisq <- TRUE
      if (length(formula(object))[2] >= 2) 
        use.norm.chisq <- TRUE
      if (model == "ht") 
        use.norm.chisq <- TRUE
      object$fstatistic <- pwaldtest(object, test = ifelse(use.norm.chisq, 
                                                           "Chisq", "F"), vcov = vcov_arg)
      if (!is.null(vcov_arg)) {
        if (is.matrix(vcov_arg)) 
          rvcov <- vcov_arg
        if (is.function(vcov_arg)) 
          rvcov <- vcov_arg(object)
        std.err <- sqrt(diag(rvcov))
      }
      else {
        std.err <- sqrt(diag(stats::vcov(object)))
      }
      b <- coefficients(object)
      z <- b/std.err
      p <- if (use.norm.chisq) {
        2 * pnorm(abs(z), lower.tail = FALSE)
      }
      else {
        2 * pt(abs(z), df = object$df.residual, lower.tail = FALSE)
      }
      object$coefficients <- cbind(b, std.err, z, p)
      colnames(object$coefficients) <- if (use.norm.chisq) {
        c("Estimate", "Std. Error", "z-value", "Pr(>|z|)")
      }
      else {
        c("Estimate", "Std. Error", "t-value", "Pr(>|t|)")
      }
      if (!is.null(vcov_arg)) {
        object$rvcov <- rvcov
        rvcov.name <- paste0(deparse(substitute(vcov)))
        attr(object$rvcov, which = "rvcov.name") <- rvcov.name
      }
      object$df <- c(length(b), object$df.residual, length(object$aliased))
      class(object) <- c("summary.plm.full", "plm", "panelmodel")
      object
    }
    

    And for printing:

    print.summary.plm.full <- function (x, digits = max(3, getOption("digits") - 2), width = getOption("width"), 
              subset = NULL, ...) 
    {
      formula <- formula(x)
      has.instruments <- (length(formula)[2] >= 2)
      effect <- plm:::describe(x, "effect")
      model <- plm:::describe(x, "model")
      if (model != "pooling") {
        cat(paste(plm:::effect.plm.list[effect], " ", sep = ""))
      }
      cat(paste(plm:::model.plm.list[model], " Model", sep = ""))
      if (model == "random") {
        ercomp <- describe(x, "random.method")
        cat(paste(" \n   (", random.method.list[ercomp], "'s transformation)\n", 
                  sep = ""))
      }
      else {
        cat("\n")
      }
      if (has.instruments) {
        cat("Instrumental variable estimation\n")
        if (model != "within") {
          ivar <- plm:::describe(x, "inst.method")
          cat(paste0("   (", plm:::inst.method.list[ivar], "'s transformation)\n"))
        }
      }
      if (!is.null(x$rvcov)) {
        cat("\nNote: Coefficient variance-covariance matrix supplied: ", 
            attr(x$rvcov, which = "rvcov.name"), "\n", sep = "")
      }
      cat("\nCall:\n")
      print(x$call)
      cat("\n")
      pdim <- pdim(x)
      print(pdim)
      if (model %in% c("fd", "between")) {
        cat(paste0("Observations used in estimation: ", nobs(x), 
                   "\n"))
      }
      if (model == "random") {
        cat("\nEffects:\n")
        print(x$ercomp)
      }
      cat("\nResiduals:\n")
      df <- x$df
      rdf <- df[2L]
      if (rdf > 5L) {
        save.digits <- unlist(options(digits = digits))
        on.exit(options(digits = save.digits))
        print(plm:::sumres(x))
      }
      else if (rdf > 0L) 
        print(residuals(x), digits = digits)
      if (rdf == 0L) {
        cat("ALL", x$df[1L], "residuals are 0: no residual degrees of freedom!")
        cat("\n")
      }
      if (any(x$aliased, na.rm = TRUE)) {
        naliased <- sum(x$aliased, na.rm = TRUE)
        cat("\nCoefficients: (", naliased, " dropped because of singularities)\n", 
            sep = "")
      }
      else cat("\nCoefficients:\n")
      if (is.null(subset)) 
        printCoefmat(coef(x), digits = digits)
      else printCoefmat(coef(x)[subset, , drop = FALSE], digits = digits)
      cat("\n")
      cat(paste("Total Sum of Squares:    ", signif(plm:::tss.plm(x), digits), 
                "\n", sep = ""))
      cat(paste("Residual Sum of Squares: ", signif(deviance(x), 
                                                    digits), "\n", sep = ""))
      cat(paste("R-Squared:      ", signif(x$r.squared[1], digits), 
                "\n", sep = ""))
      cat(paste("Adj. R-Squared: ", signif(x$r.squared[2], digits), 
                "\n", sep = ""))
      # add the new r squared terms here
      cat(paste("Overall R-Squared:      ", signif(x$r.squared[3], digits), 
                "\n", sep = ""))
      cat(paste("Between R-Squared:      ", signif(x$r.squared[4], digits), 
                "\n", sep = ""))
      fstat <- x$fstatistic
      if (names(fstat$statistic) == "F") {
        cat(paste("F-statistic: ", signif(fstat$statistic), " on ", 
                  fstat$parameter["df1"], " and ", fstat$parameter["df2"], 
                  " DF, p-value: ", format.pval(fstat$p.value, digits = digits), 
                  "\n", sep = ""))
      }
      else {
        cat(paste("Chisq: ", signif(fstat$statistic), " on ", 
                  fstat$parameter, " DF, p-value: ", format.pval(fstat$p.value, 
                                                                 digits = digits), "\n", sep = ""))
      }
      invisible(x)
    }
    
    

    And now if we use the custom functions:

    library(plm)
    # Create some random data
    set.seed(1) 
    x=rnorm(100); fe=rep(rnorm(10),each=10); id=rep(1:10,each=10); ti=rep(1:10,10); e=rnorm(100)
    y=x+fe+e
    
    data=data.frame(y,x,id,ti)
    
    # Get plm within R2
    reg=plm(y~x,model="within",index=c("id","ti"), effect = "twoways", data=data)
    
    summary.plm.full(reg)
    

    Which prints:

    Twoways effects Within Model
    
    Call:
    plm(formula = y ~ x, data = data, effect = "twoways", model = "within", 
        index = c("id", "ti"))
    
    Balanced Panel: n = 10, T = 10, N = 100
    
    Residuals:
        Min.  1st Qu.   Median  3rd Qu.     Max. 
    -2.36060 -0.56664 -0.11085  0.56070  2.00869 
    
    Coefficients:
      Estimate Std. Error t-value  Pr(>|t|)    
    x  1.12765    0.11306  9.9741 1.086e-15 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Total Sum of Squares:    157.21
    Residual Sum of Squares: 70.071
    R-Squared:      0.55428
    Adj. R-Squared: 0.44842
    Overall R-Squared:      0.33672
    Between R-Squared:      0.17445
    F-statistic: 99.4829 on 1 and 80 DF, p-value: 1.0856e-15