Search code examples
rbayesianr2jags

Bayesian Rolling Poisson Regression in Jags (via R2jags)


The Problem

I have a small dataset (N=100). I need to run a Poisson regression, but excluding one observation at a time (hence, a Rolling Poisson Regression).

There are several predictors in the equation, but I care about one (call it b.x). My idea is to see how much b.x varies across the 100 models. Then, I'd like to plot these 100 point estimates with the effect sizes on the Y-axis, and the model number in the X-axis.

In summary, I need the following:

  1. Run a Rolling Poisson Regression in JAGS (via R2jags).

  2. After getting the estimates, plot them.

Please note that my Poisson model in JAGS is running fine (below is a sample toy of my model/data). However, I haven't been able to implement the "Rolling" version.

The Self-Contained Example

# clear R
rm(list=ls())
cat("\014")

# load libraries
if (!require("pacman")) install.packages("pacman"); library(pacman) 
p_load(R2jags)


# Toy Data
N <- 100
x <- rnorm(n=N)  # standard Normal predictor
y <- rpois(n=N, lambda = 1)  # Poisson DV


# model
model <- function() {
    ## Likelihood
    for(i in 1:N){
            y[i] ~ dpois(lambda[i])
            log(lambda[i]) <- 
                    mu + # intercept
                    b.x*x[i]
            }
    ## Priors 
    mu ~  dnorm(0, 0.01) ## intercept
    b.x ~ dnorm(0, 0.01)
    }

# list elements
data.list <- list(N = N, y = y, x = x)

# run model
model.fit <- jags(
    data=data.list,
    inits=NULL,
    parameters.to.save = c("b.x"),
    n.chains = 1,
    n.iter = 20,
    n.burnin = 2, 
    model.file=model,
    progress.bar = "none")

Ok. That's the model. In model.fit there is b.x, the coefficient I have to get 100 times. With my current code, I am able to obtain it just once, with the full dataset. However, I need to obtain it a second time with the first row of the df excluded, and then a third time but with the second row of the df excluded, and so on and so forth. And then, plot all these b.x's.


Now, just for the sake of the example, I will create a simple table, just to signal that I need the first element (the coefficient of of b.x).

## I sourced this function below from    https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R

# Function to Create Table
mcmctab <- function(sims, ci = .8, digits = 2){

    require(coda) 

    if(class(sims) == "jags" | class(sims) == "rjags"){
            sims <- as.matrix(as.mcmc(sims))
    }
    if(class(sims) == "bugs"){
            sims <- sims$sims.matrix
    }  
    if(class(sims) == "mcmc"){
            sims <- as.matrix(sims)
    }    
    if(class(sims) == "mcmc.list"){
            sims <- as.matrix(sims)
    }      
    if(class(sims) == "stanfit"){
            stan_sims <- rstan::As.mcmc.list(sims)
            sims <- as.matrix(stan_sims)
    }      


    dat <- t(sims)

    mcmctab <- apply(dat, 1,
                     function(x) c(Mean = round(mean(x), digits = digits), # Posterior mean
                                   SD = round(sd(x), digits = 3), # Posterior SD
                                   Lower = as.numeric(
                                           round(quantile(x, probs = c((1 - ci) / 2)), 
                                                 digits = digits)), # Lower CI of posterior
                                   Upper = as.numeric(
                                           round(quantile(x, probs = c((1 + ci) / 2)), 
                                                 digits = digits)), # Upper CI of posterior
                                   Pr. = round(
                                           ifelse(mean(x) > 0, length(x[x > 0]) / length(x),
                                                  length(x[x < 0]) / length(x)), 
                                           digits = digits) # Probability of posterior >/< 0
                     ))
    return(t(mcmctab))
}


# this is the coefficient I need, but with different data frames.
mcmctab(model.fit)[1,1]

Sorry I can't even provide an attempted solution here. Thanks very much in advance.


Solution

  • # clear R
    rm(list=ls())
    
    # load libraries
    library(R2jags)
    
    # Toy Data
    set.seed(123) # set RNG seed for reproducibility
    N <- 100
    x <- rnorm(n=N)  # standard Normal predictor
    y <- rpois(n=N, lambda = 1)  # Poisson DV
    
    # model
    model <- function() {
      ## Likelihood
      for(i in 1:N){
        y[i] ~ dpois(lambda[i])
        log(lambda[i]) <- 
          mu + # intercept
          b.x*x[i]
      }
      ## Priors 
      mu ~  dnorm(0, 0.01) ## intercept
      b.x ~ dnorm(0, 0.01)
    }
    
    # list elements
    data.list <- list() # create empty list to fill in next line
    
    # fill list with one data set for each step, with one row excluded per step
    for(i in 1:100){
      data.list[[i]] <- list(N = 99, y = y[-i], x = x[-i])
    }
    
    # Starting value for reproducibility
    model.inits <- function(){
      list("b.x" = 0)
    }
    
    # run model
    model.fit <- list() # again, create empty list first
    
    for(i in 1:100){  # use loop here to fit one model per data set
    model.fit[[i]] <- jags(
      data=data.list[[i]],
      inits=NULL,
      parameters.to.save = c("b.x"),
      n.chains = 1,
      n.iter = 20,
      n.burnin = 2, 
      model.file=model,
      progress.bar = "none")
    }
    
    
    # helper function for output
    devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
    
    # create empty data frame to be filled with estimation results per data set
    tab <- data.frame(index = c(1:100), b = rep(NA, 100), lower = rep(NA, 100), upper = rep(NA, 100))
    
    # fill with estimates, using mcmctab to extract mean & lower & upper CIs
    for(i in 1:100){
      tab[i, 2] <- mcmctab(model.fit[[i]])[1, 1]
      tab[i, 3] <- mcmctab(model.fit[[i]])[1, 3]
      tab[i, 4] <- mcmctab(model.fit[[i]])[1, 4]
    }
    
    # plot results
    library(ggplot2)
    
    p <- ggplot(data = tab, aes(x = b, y = index)) + geom_point() + geom_segment(aes(x = lower, xend = upper, yend = index))
    p
    

    output

    I thank Johannes Karreth for kindly providing this great answer.