Search code examples
rjagsrjagsrunjags

JAGS: Just another GIbbs Sampler Possible Directed Cycle Error - Can't find


I'm adapting some open source code to look at how environmental and organismal variables effect infection probability. I have been getting an error that states that there is a possible directed cycle involving resid_pred on line 10. Sorry for the long code but I wanted to include everything as perhaps the error is happening somewhere else in the model? ANY help is greatly appreciated.

cat("
model {
#Sensitivity and specificity
spec <- sum(ifelse(equals(pred, infected) && infected == 0, 1, 0)) / sum(equals(infected, 0))
sens <- sum(ifelse(equals(pred, infected) && infected == 1, 1, 0)) / sum(equals(infected, 1))

#Posterior predictive check
ppc <- step(fit_pred - fit_data)
fit_data <- sum(resid_data)
fit_pred <- sum(resid_pred)
resid_data <- abs(infected) - prob
resid_pred <- abs(pred) - prob

#Phylogenetic logistic regression model
#Logistic regression w/ varying intercepts for each host species
for(i in 1:n_obs){

#likelihood
infected[i] ~ dbern(prob[i])

#simulated prediction
pred[i] ~ dbern(prob[i])

#linear predictor
logit(prob[i]) <- reg[ecoregion[i]] + alpha[hostsp[i]] +
phy[hostsp[i]] + beta_male * male[i] +
beta_bodymass * bodymass[i] + beta_migratory * migratory[i] + beta_adult * adult[i] +
beta_high_canopy * high_canopy[i] + beta_min_rain * min_rain[i] +
beta_max_rain * max_rain[i] + beta_min_temp * min_temp[i] +
beta_max_temp * max_temp[i] + beta_ndvi * ndvi[i] +
beta_ndwi * ndwi[i] +
beta_elevation * elevation[i]
}

#Impute any missing covariate values
for(i in 1:n_obs){
male[i] ~ dbern(0.5);
adult[i] ~ dbern(0.5)
}

#Estimate varying intercepts among regions
for(r in 1:n_ecoregion){
reg[r] ~ dnorm(0, tau_reg);
}
tau_reg <- pow(sigma_reg,-2); sigma_reg ~ dexp(0.5)

#Estimate varying intercepts among host species from normal distributions
for (j in 1:n_species){
alpha[j] ~ dnorm(0, tau_alpha);
}
tau_alpha <- pow(sigma_alpha,-2); sigma_alpha ~ dexp(0.5)

#Multivariate normal phylogenetic component
#invA_array = array of inverse phylo vcv matrices
#K indicates which vcv matrix to sample (see below)
phy[1:n_species] ~ dmnorm(zeros[], tau_phy*invA_array[,,K])

#Precision for phylogenetic cov matrix (inverted bc phylo vcv matrices are inverted)
tau_phy <- 1 / pow(sigma_phy, 2); sigma_phy ~ dexp(0.5)

#Prior for sampling phylogenetic vcv matrices with equal probabilities
K ~ dcat(p[])
for (k in 1:n_tree) {
p[k] <- 1 / n_tree
}

#Gibbs variable selection for beta coefficients
beta_high_canopy <- beta_high_canopyT * Ind_high_canopy
beta_high_canopyT ~ dnorm(0, tau_sel[1, Ind_high_canopy + 1])
Ind_high_canopy ~ dbern(pind) #indicators determine whether to sample from spike or slab

beta_min_rain <- beta_min_rainT * Ind_min_rain
beta_min_rainT ~ dnorm(0, tau_sel[2, Ind_min_rain + 1])
Ind_min_rain ~ dbern(pind)

beta_max_rain <- beta_max_rainT * Ind_max_rain
beta_max_rainT ~ dnorm(0, tau_sel[3, Ind_max_rain + 1])
Ind_max_rain ~ dbern(pind)

beta_min_temp <- beta_min_tempT * Ind_min_temp
beta_min_tempT ~ dnorm(0, tau_sel[4, Ind_min_temp + 1])
Ind_min_temp ~ dbern(pind)

beta_max_temp <- beta_max_tempT * Ind_max_temp
beta_max_tempT ~ dnorm(0, tau_sel[5, Ind_max_temp + 1])
Ind_max_temp ~ dbern(pind)

beta_male <- beta_maleT * Ind_male
beta_maleT ~ dnorm(0, tau_sel[6, Ind_male + 1])
Ind_male ~ dbern(pind)

beta_bodymass <- beta_bodymassT * Ind_bodymass
beta_bodymassT ~ dnorm(0, tau_sel[7, Ind_bodymass + 1])
Ind_bodymass ~ dbern(pind)

beta_ndvi <- beta_ndviT * Ind_ndvi
beta_ndviT ~ dnorm(0, tau_sel[8, Ind_ndvi + 1])
Ind_ndvi ~ dbern(pind)

beta_ndwi<-beta_ndwiT*Ind_ndwi
beta_ndwiT~dnorm(0,tau_sel[9,Ind_ndwi+1])
Ind_ndwi~dbern(pind)

beta_migratory<-beta_migratoryT*Ind_migratory
beta_migratoryT~dnorm(0,tau_sel[10,Ind_migratory+1])
Ind_migratory~dbern(pind)

beta_adult<-beta_adultT*Ind_adult
beta_adultT~dnorm(0,tau_sel[11,Ind_adult +1])
Ind_adult ~dbern(pind)

beta_elevation<-beta_elevationT*Ind_elevation
beta_elevationT~dnorm(0,tau_sel[12,Ind_elevation+1])
Ind_elevation~dbern(pind)

#Sample the indicators and formulate the spike and slab distributions
for(s in 1:12){
tau_sel[s,2]<-pow(sd_sel[s],-2);
#slab is a vague normal prior
sd_sel[s]~dexp(0.5);
tau_sel[s,1]~dunif(85,100);
#spike concentrates at zero with low variance
}
pind <-0.2
#coefficients have ~ 20% inclusion probability
}
",
  file=(phylo.jags.mod<-tempfile()))

#Model Run Parameters
na = 25000 # adaptation
nb = 3e+05 # burnin
ni = 25000 # samples
nc = 2 # chains


library(MCMCpack)
library(runjags)
Sys.setenv(JAGS_HOME = "C:/Program Files/JAGS/JAGS-4.3.1")
library(rjags)
load.module("glm")



param<-c("reg","alpha",
         "beta_elevation","beta_ndwi","beta_ndvi",
         "beta_high_canopy","beta_male",
         "beta_migratory","beta_min_rain",
         "beta_max_rain","beta_min_temp",
         "beta_adult","beta_max_temp",
         "beta_bodymass","Ind_elevation",
         "Ind_adult","Ind_high_canopy",
         "Ind_male","Ind_migratory","Ind_min_rain",
         "Ind_bodymass","Ind_max_rain",
         "Ind_min_temp","Ind_max_temp",
         "Ind_ndwi","Ind_ndvi","phy",
         "sigma_reg","sigma_alpha","sigma_phy",
         "ppc","spec","sens")

jags_inits_haem <- function() {
  list(reg = rnorm(data_jags_haem$n_ecoregion),
       alpha = rnorm(data_jags_haem$n_species),
       phy = rnorm(data_jags_haem$n_species),
       beta_elevationT = rnorm(1),
       beta_ndwiT = rnorm(1),
       beta_ndviT = rnorm(1),
       beta_high_canopyT = rnorm(1),
       beta_min_rainT = rnorm(1),
       beta_max_rainT = rnorm(1),
       beta_maleT = rnorm(1),
       beta_migratoryT = rnorm(1),
       beta_adultT = rnorm(1),
       beta_min_tempT = rnorm(1),
       beta_max_tempT = rnorm(1),
       beta_bodymassT = rnorm(1),
       sigma_reg = runif(1, 0.01,10),
       sigma_alpha = runif(1,0.01, 10),
       sigma_phy = runif(1,0.01, 10),
       sd_sel = runif(12,0.01, 10))
}

load("FOLDER/phyloglmm_data_jags_haem.rda")
rjags_phylo_haem <- jags.model(phylo.jags.mod,
                               data = data_jags_haem, inits = jags_inits_haem,
                               n.chains = nc, n.adapt = na)
update(rjags_phylo_haem, nb)
out_haem <- jags.samples(rjags_phylo_haem,
                         param, ni, thin = 25)
save(out_haem, file = "Processeddata/out_haem.rda")

I have read other posts, but I'm not able to identify where the directed cycle is coming from. I've posted this in SourceForge as well, and that has the attachment for the "phyloglmm_data_jags_haem.rda" file. I haven't figured out how to attach it here https://sourceforge.net/p/mcmc-jags/discussion/610037/thread/9c6dc6b0ef/


Solution

  • I can get your code to run. I make no promise as to the accuracy of the results.

    The problem appears to be that you have a missing value for predictor ndvi in the 326th observation of your input data. Amending your imputation loop to

    for(i in 1:n_obs){
      male[i] ~ dbern(0.5);
      adult[i] ~ dbern(0.5)
      ndvi[i] ~ dunif(1100, 8000)
    }
    

    Allows the model to compile. [1100 and 8000 are, respectively just below/above the minimum/maximum value of the other observations in ndvi, so they seem reasonable limits for the imputation...]

    As an aside, to manage my boredom, I adjusted your MCMC parameters to

    na = 250 # adaptation
    nb = 3e+02 # burnin
    ni = 250 # samples
    nc = 1 # chains
    

    That allows sanity checking, but are obviously far too small for actual production work.

    My best guess is that the missing ndvi value induces the cycling by requiring the model to estimate ndvi for this observation. This in turn makes prob(?) stochastic rather than observed.

    By the way, you don't need library(MCMCpack) to run your code.

    How did I solve this?

    I started by commenting out everything inside your likelihood loop. This showed that the prior was estimable. Then, in turn, I uncommented every term in your prediction model. Each model compiled and was estimable until I included beta_ndvi * ndvi[i]. So then I checked the data and found the mising value. I adapted your code to impute it. After that, everything ran without error or warning.

    Moral of the story: it is always a good idea to look at your data.

    It would have been quicker to check for missing values in all your predictors before doing anything else!