Search code examples
rglmnewtons-method

Plotting newton-raphson/fisher scoring iterations in R


Is there a package in R plotting newton-raphson/fisher scoring iterations when fitting a glm modelel (from the stats package)?


Solution

  • I answered a very similar question yesterday. In your case however, things are a little simpler.

    Note that when you call glm, it eventually calls glm.fit (or any other method argument you specify to glm) which computes the solution path in the loop from lines 78 to 170. The current iteration's value of the coefficients is computed on line 97 using a .Call to a C function C_Cdqrls. As a hack, you can extract the current value of the coefficients to the global environment (fit$coefficients), within this loop, by modifying the glm.fit function like so:

    glm.fit.new = function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL, 
                            mustart = NULL, offset = rep(0, nobs), family = gaussian(), 
                            control = list(), intercept = TRUE) {
                              control <- do.call("glm.control", control)
                              x <- as.matrix(x)
                              xnames <- dimnames(x)[[2L]]
                              ynames <- if (is.matrix(y)) 
                                rownames(y)
                              else names(y)
                              conv <- FALSE
                              nobs <- NROW(y)
                              nvars <- ncol(x)
                              EMPTY <- nvars == 0
                              if (is.null(weights)) 
                                weights <- rep.int(1, nobs)
                              if (is.null(offset)) 
                                offset <- rep.int(0, nobs)
                              variance <- family$variance
                              linkinv <- family$linkinv
                              if (!is.function(variance) || !is.function(linkinv)) 
                                stop("'family' argument seems not to be a valid family object", 
                                     call. = FALSE)
                              dev.resids <- family$dev.resids
                              aic <- family$aic
                              mu.eta <- family$mu.eta
                              unless.null <- function(x, if.null) if (is.null(x)) 
                                if.null
                              else x
                              valideta <- unless.null(family$valideta, function(eta) TRUE)
                              validmu <- unless.null(family$validmu, function(mu) TRUE)
                              if (is.null(mustart)) {
                                eval(family$initialize)
                              }
                              else {
                                mukeep <- mustart
                                eval(family$initialize)
                                mustart <- mukeep
                              }
                              if (EMPTY) {
                                eta <- rep.int(0, nobs) + offset
                                if (!valideta(eta)) 
                                  stop("invalid linear predictor values in empty model", 
                                       call. = FALSE)
                                mu <- linkinv(eta)
                                if (!validmu(mu)) 
                                  stop("invalid fitted means in empty model", call. = FALSE)
                                dev <- sum(dev.resids(y, mu, weights))
                                w <- ((weights * mu.eta(eta)^2)/variance(mu))^0.5
                                residuals <- (y - mu)/mu.eta(eta)
                                good <- rep_len(TRUE, length(residuals))
                                boundary <- conv <- TRUE
                                coef <- numeric()
                                iter <- 0L
                              }
                              else {
                                coefold <- NULL
                                eta <- if (!is.null(etastart)) 
                                  etastart
                                else if (!is.null(start)) 
                                  if (length(start) != nvars) 
                                    stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s", 
                                                  nvars, paste(deparse(xnames), collapse = ", ")), 
                                         domain = NA)
                                else {
                                  coefold <- start
                                  offset + as.vector(if (NCOL(x) == 1L) 
                                    x * start
                                    else x %*% start)
                                }
                                else family$linkfun(mustart)
                                mu <- linkinv(eta)
                                if (!(validmu(mu) && valideta(eta))) 
                                  stop("cannot find valid starting values: please specify some", 
                                       call. = FALSE)
                                devold <- sum(dev.resids(y, mu, weights))
                                boundary <- conv <- FALSE
    
                                # EDIT: counter to create track of iterations
                                i <<- 1
                                for (iter in 1L:control$maxit) {
                                  good <- weights > 0
                                  varmu <- variance(mu)[good]
                                  if (anyNA(varmu)) 
                                    stop("NAs in V(mu)")
                                  if (any(varmu == 0)) 
                                    stop("0s in V(mu)")
                                  mu.eta.val <- mu.eta(eta)
                                  if (any(is.na(mu.eta.val[good]))) 
                                    stop("NAs in d(mu)/d(eta)")
                                  good <- (weights > 0) & (mu.eta.val != 0)
                                  if (all(!good)) {
                                    conv <- FALSE
                                    warning(gettextf("no observations informative at iteration %d", 
                                                     iter), domain = NA)
                                    break
                                  }
                                  z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
                                  w <- sqrt((weights[good] * mu.eta.val[good]^2)/variance(mu)[good])
                                  fit <- .Call(stats:::C_Cdqrls, x[good, , drop = FALSE] * 
                                                 w, z * w, min(1e-07, control$epsilon/1000), check = FALSE)
    
                                  #======================================================
                                  # EDIT: assign the coefficients to variables in the global namespace
                                  #======================================================
                                  assign(paste0("iteration_x_", i), fit$coefficients, 
                                         envir = .GlobalEnv)
                                  i <<- i + 1   # increase the counter
    
                                  if (any(!is.finite(fit$coefficients))) {
                                    conv <- FALSE
                                    warning(gettextf("non-finite coefficients at iteration %d", 
                                                     iter), domain = NA)
                                    break
                                  }
                                  if (nobs < fit$rank) 
                                    stop(sprintf(ngettext(nobs, "X matrix has rank %d, but only %d observation", 
                                                          "X matrix has rank %d, but only %d observations"), 
                                                 fit$rank, nobs), domain = NA)
                                  start[fit$pivot] <- fit$coefficients
                                  eta <- drop(x %*% start)
                                  mu <- linkinv(eta <- eta + offset)
                                  dev <- sum(dev.resids(y, mu, weights))
                                  if (control$trace) 
                                    cat("Deviance = ", dev, " Iterations - ", iter, 
                                        "\n", sep = "")
                                  boundary <- FALSE
                                  if (!is.finite(dev)) {
                                    if (is.null(coefold)) 
                                      stop("no valid set of coefficients has been found: please supply starting values", 
                                           call. = FALSE)
                                    warning("step size truncated due to divergence", 
                                            call. = FALSE)
                                    ii <- 1
                                    while (!is.finite(dev)) {
                                      if (ii > control$maxit) 
                                        stop("inner loop 1; cannot correct step size", 
                                             call. = FALSE)
                                      ii <- ii + 1
                                      start <- (start + coefold)/2
                                      eta <- drop(x %*% start)
                                      mu <- linkinv(eta <- eta + offset)
                                      dev <- sum(dev.resids(y, mu, weights))
                                    }
                                    boundary <- TRUE
                                    if (control$trace) 
                                      cat("Step halved: new deviance = ", dev, "\n", 
                                          sep = "")
                                  }
                                  if (!(valideta(eta) && validmu(mu))) {
                                    if (is.null(coefold)) 
                                      stop("no valid set of coefficients has been found: please supply starting values", 
                                           call. = FALSE)
                                    warning("step size truncated: out of bounds", 
                                            call. = FALSE)
                                    ii <- 1
                                    while (!(valideta(eta) && validmu(mu))) {
                                      if (ii > control$maxit) 
                                        stop("inner loop 2; cannot correct step size", 
                                             call. = FALSE)
                                      ii <- ii + 1
                                      start <- (start + coefold)/2
                                      eta <- drop(x %*% start)
                                      mu <- linkinv(eta <- eta + offset)
                                    }
                                    boundary <- TRUE
                                    dev <- sum(dev.resids(y, mu, weights))
                                    if (control$trace) 
                                      cat("Step halved: new deviance = ", dev, "\n", 
                                          sep = "")
                                  }
                                  if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
                                    conv <- TRUE
                                    coef <- start
                                    break
                                  }
                                  else {
                                    devold <- dev
                                    coef <- coefold <- start
                                  }
                                }
                                if (!conv) 
                                  warning("glm.fit: algorithm did not converge", call. = FALSE)
                                if (boundary) 
                                  warning("glm.fit: algorithm stopped at boundary value", 
                                          call. = FALSE)
                                eps <- 10 * .Machine$double.eps
                                if (family$family == "binomial") {
                                  if (any(mu > 1 - eps) || any(mu < eps)) 
                                    warning("glm.fit: fitted probabilities numerically 0 or 1 occurred", 
                                            call. = FALSE)
                                }
                                if (family$family == "poisson") {
                                  if (any(mu < eps)) 
                                    warning("glm.fit: fitted rates numerically 0 occurred", 
                                            call. = FALSE)
                                }
                                if (fit$rank < nvars) 
                                  coef[fit$pivot][seq.int(fit$rank + 1, nvars)] <- NA
                                xxnames <- xnames[fit$pivot]
                                residuals <- (y - mu)/mu.eta(eta)
                                fit$qr <- as.matrix(fit$qr)
                                nr <- min(sum(good), nvars)
                                if (nr < nvars) {
                                  Rmat <- diag(nvars)
                                  Rmat[1L:nr, 1L:nvars] <- fit$qr[1L:nr, 1L:nvars]
                                }
                                else Rmat <- fit$qr[1L:nvars, 1L:nvars]
                                Rmat <- as.matrix(Rmat)
                                Rmat[row(Rmat) > col(Rmat)] <- 0
                                names(coef) <- xnames
                                colnames(fit$qr) <- xxnames
                                dimnames(Rmat) <- list(xxnames, xxnames)
                              }
                              names(residuals) <- ynames
                              names(mu) <- ynames
                              names(eta) <- ynames
                              wt <- rep.int(0, nobs)
                              wt[good] <- w^2
                              names(wt) <- ynames
                              names(weights) <- ynames
                              names(y) <- ynames
                              if (!EMPTY) 
                                names(fit$effects) <- c(xxnames[seq_len(fit$rank)], rep.int("", 
                                                                                            sum(good) - fit$rank))
                              wtdmu <- if (intercept) 
                                sum(weights * y)/sum(weights)
                              else linkinv(offset)
                              nulldev <- sum(dev.resids(y, wtdmu, weights))
                              n.ok <- nobs - sum(weights == 0)
                              nulldf <- n.ok - as.integer(intercept)
                              rank <- if (EMPTY) 
                                0
                              else fit$rank
                              resdf <- n.ok - rank
                              aic.model <- aic(y, n, mu, weights, dev) + 2 * rank
                              list(coefficients = coef, residuals = residuals, fitted.values = mu, 
                                   effects = if (!EMPTY) fit$effects, R = if (!EMPTY) Rmat, 
                                   rank = rank, qr = if (!EMPTY) structure(fit[c("qr", "rank", 
                                                                                 "qraux", "pivot", "tol")], class = "qr"), family = family, 
                                   linear.predictors = eta, deviance = dev, aic = aic.model, 
                                   null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights, 
                                   df.residual = resdf, df.null = nulldf, y = y, converged = conv, 
                                   boundary = boundary)
                            }
    

    Note that this is a hack for a couple of reasons:
    1. The function C_Cdrqls is not exported by the package stats, and so we have to look for it within namespace:package:stats.
    2. This pollutes your global environment with the iteration values via a side-effect of the call to glm.fit.new, creating one vector per iteration. Side-effects are generally frowned upon in functional languages like R. You can probably clean the multiple objects bit up by creating a matrix or a data.frame and assign within that.

    However, once you have the iteration values extracted, you can do whatever you want with them, including plotting them.

    Here is what a call to glm with the newly defined glm.fit.new method would look like:

    counts = c(18,17,15,20,10,20,25,13,12)
    outcome = gl(3,1,9)
    treatment = gl(3,3)
    print(d.AD = data.frame(treatment, outcome, counts))
    glm.D93 = glm(counts ~ outcome + treatment, family = poisson(), 
                   control = list(trace = TRUE, epsilon = 1e-16), method = "glm.fit.new")
    

    You can check that the iteration parameter values have indeed been populated in the global environment:

    > ls(pattern = "iteration_x_")
     [1] "iteration_x_1"  "iteration_x_10" "iteration_x_11" "iteration_x_2" 
     [5] "iteration_x_3"  "iteration_x_4"  "iteration_x_5"  "iteration_x_6" 
     [9] "iteration_x_7"  "iteration_x_8"  "iteration_x_9"