Search code examples
regressionbayesianpymc3ordinal

Using a metric predictor when modelling ordinal predicted variable in PyMC3


I am trying to implement the ordered probit regression model from chapter 23.4 of Doing Bayesian Data Analysis (Kruschke) in PyMC3. After sampling, the posterior distribution for the intercept and slope are not really comparable to the results from the book. I think there is some fundamental issue with the model definition, but I fail to see it.

Data: X is the metric predictor (standardized to zX), Y are ordinal outcomes (1-7).

nYlevels3 = df3.Y.nunique()

# Setting the thresholds for the ordinal outcomes. The outer sides are
# fixed, while the others are estimated.

thresh3 = [k + .5 for k in range(1, nYlevels3)]
thresh_obs3 = np.ma.asarray(thresh3)
thresh_obs3[1:-1] = np.ma.masked

@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
    out = np.empty((mu.size, nYlevels3))
    n = norm(loc=mu, scale=sigma)       
    out[:,0] = n.cdf(theta[0])        
    out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])])
    out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])])
    out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])])
    out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])])
    out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])])
    out[:,6] = 1 - n.cdf(theta[5])
    return out

with pm.Model() as ordinal_model_metric:    

    theta = pm.Normal('theta', mu=thresh3, tau=np.repeat(1/2**2, len(thresh3)),
                      shape=len(thresh3), observed=thresh_obs3, testval=thresh3[1:-1])

    # Intercept
    zbeta0 = pm.Normal('zbeta0', mu=(1+nYlevels3)/2, tau=1/nYlevels3**2)

    # Slope
    zbeta = pm.Normal('zbeta', mu=0.0, tau=1/nYlevels3**2)

    # Mean of the underlying metric distribution
    mu = pm.Deterministic('mu', zbeta0 + zbeta*zX)

    zsigma = pm.Uniform('zsigma', nYlevels3/1000.0, nYlevels3*10.0)

    pr = outcome_probabilities(theta, mu, zsigma)

    y = pm.Categorical('y', pr, observed=df3.Y.cat.codes)

http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb

For reference, here is the JAGS model used by Kruschke on which I based my model:

Ntotal = length(y)
# Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
# This allows all parameters to be interpretable on the response scale.
nYlevels = max(y)  
thresh = rep(NA,nYlevels-1)
thresh[1] = 1 + 0.5
thresh[nYlevels-1] = nYlevels-1 + 0.5

 modelString = "
  model {
    for ( i in 1:Ntotal ) {
      y[i] ~ dcat( pr[i,1:nYlevels] )
      pr[i,1] <- pnorm( thresh[1] , mu[x[i]] , 1/sigma[x[i]]^2 )
      for ( k in 2:(nYlevels-1) ) {
        pr[i,k] <- max( 0 ,  pnorm( thresh[ k ] , mu[x[i]] , 1/sigma[x[i]]^2 )
                           - pnorm( thresh[k-1] , mu[x[i]] , 1/sigma[x[i]]^2 ) )
      }
      pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu[x[i]] , 1/sigma[x[i]]^2 )
    }
    for ( j in 1:2 ) { # 2 groups
      mu[j] ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
      sigma[j] ~ dunif( nYlevels/1000 , nYlevels*10 )
    }
    for ( k in 2:(nYlevels-2) ) {  # 1 and nYlevels-1 are fixed, not stochastic
      thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
    }
  }

Solution

  • It was not a fundamental issue after all: I forgot to indicate the axis for np.max() in the function below.

    @as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
    def outcome_probabilities(theta, mu, sigma):
        out = np.empty((mu.size, nYlevels3))
        n = norm(loc=mu, scale=sigma)       
        out[:,0] = n.cdf(theta[0])        
        out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])], axis=0)
        out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])], axis=0)
        out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])], axis=0)
        out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])], axis=0)
        out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])], axis=0)
        out[:,6] = 1 - n.cdf(theta[5])
        return out