Search code examples
rmixed-modelsnlme

Specify random effect with different variance across groups in nlme


I have a dataset with measures for individuals, where each measure represents a specific type of measure (say '1' or '2') and each individual belongs to specific group (say 'A' or 'B'). For a subset of individuals, I have observed both measures '1' and '2'. In this data, the different measures have different variances, and there is a subject-level random effect that has very different variances in the two groups. How would I go about fitting this model in the right way?

Here is an example:

dat <- structure(list(subject = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
                       28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 
                       44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 
                       60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 
                       76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 
                       92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 101, 102, 102, 103, 
                       103, 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 
                       110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 115, 116, 
                       116, 117, 117, 118, 118, 119, 119, 120, 120, 121, 121, 122, 122, 
                       123, 123, 124, 124, 125, 125, 126, 126, 127, 127, 128, 128, 129, 
                       129, 130, 130), 
           group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), class = "factor"), 
           measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
                                 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                                 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
                                 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                                 2L), .Label = c("1", "2"), class = "factor"), 
           y = c(-1.71, 
                 121.74, -1.57, 109.96, -0.64, 101.67, -0.13, 120.64, 1.47, 
                 101.99, -4.51, 133.18, -2.9, 117.95, -0.97, 126.94, -1.44, 
                 105.1, -1.52, 122.2, -2.29, 130.17, -0.35, 133.14, -0.94, 
                 112.68, -0.89, 105.37, -2.49, 126.75, -2.61, 139.25, -2.13, 
                 113.43, 0.61, 140.76, -0.75, 129.17, 1.94, 139.4, -0.49, 
                 119.03, -2.09, 89.97, -2.76, 107.85, 1.61, 136.31, -0.55, 
                 128.6, 0.41, 86.66, 0.54, 100.03, 2.46, 115.37, 6.94, 109.34, 
                 3.78, 102.34, -4.46, 104.06, 1.48, 105.06, 3.98, 85.21, 1.31, 
                 103.17, -3.35, 110.83, 2.75, 98.38, -2.43, 101.57, 2.2, 120.45, 
                 -4.06, 101.25, 3.85, 99.38, 2.17, 108, 9.27, 100.76, 3.27, 
                 110.3, 1.22, 98.91, 1.62, 105.65, 4.64, 113.07, 8.14, 108.75, 
                 6.84, 73.08, 1.42, 99.41, -0.5, 95.25, 1.42, 3.76, 102.95, 
                 85.45, -2.71, -0.48, 137.34, 114.61, -0.42, 1.71, 98.82, 
                 83.06, -3.51, -0.32, 109.66, 91.99, -0.46, -1.35, 113.88, 
                 97.32, -0.93, 1.17, 111.26, 103.9, -4.11, 6.78, 106.36, 88.22, 
                 -0.85, -6.56, 137.39, 112.19, -0.91, 3.26, 122.53, 105.18, 
                 -0.61, 4.25, 111.01, 95.85, -2.68, 3.1, 142.26, 114.44, -0.31, 
                 3.76, 127.61, 102.26, -1.82, 4.01, 116.61, 97.1, -3.61, 0.9, 
                 107.73, 90.6, -0.13, 3.78, 108.73, 95.12)), 
      row.names = c(NA, -160L), class = "data.frame")

Plot of values for the two measures across groups

I can fit a mixed-effects model with nlme:

init <- c(-1.2, 120, 2, 100)

model1 <- nlme(y ~ a,
               data = dat,
               fixed = list(a ~ group : measure + 0),
               random = a ~ 1,
               groups = ~ subject,
               start = init,
               weights = varIdent(form = ~ 1 | measure))

Is there a way to fit the model such that the random effect has different variances across groups? I have a feeling that this should be achievable by using a correlation structure, but so far I have been unsuccessful.

In reality, my model is nonlinear and more complex than the above, so the problem can unfortunately not be solved by crossed random effects with lmer (but maybe a crossed random effects hack for nlme?)


Solution

  • Following, Ben Bolker's suggestion in the comment, I ended up implementing the model in the Template Model Builder framework (Thanks for pointing me in this direction Ben!) The model did not converge for the given simulated dataset, but it did for other simulations and fitted the correct model with substantially improved likelihood values (identical to the nlme model when removing the extra variance parameter).

    To achieve this, I first made the model code in mixed.cpp:

    #include <TMB.hpp>
    
    template<class Type>
    Type objective_function<Type>::operator() ()
    {
      // Data and design variables
      DATA_VECTOR(y); // Outcome
      DATA_ARRAY(X_mean); // Covariates
      DATA_ARRAY(X_subj); // Subject design matrix
      DATA_ARRAY(X_var); // Variance design matrix  
      DATA_ARRAY(X_var_subj); // Subject-level random-effect variance design matrix
      
      
      int n = y.size(); // Number of observations
      int n_mean = X_mean.dim[1]; // Number of observations
      int n_subj = X_subj.dim[1]; // Number of subjects
      
      
      // Parameters
      PARAMETER_VECTOR(a);  
      PARAMETER_VECTOR(a_subj);
      PARAMETER(log_sigma_measure1);
      PARAMETER(log_sigma_measure2);
      PARAMETER(log_sigma_subj_group1);
      PARAMETER(log_sigma_subj_group2);
      
      // Variance parameters
      Type sigma_measure1 = exp(log_sigma_measure1);
      Type sigma_measure2 = exp(log_sigma_measure2);
      Type sigma_subj_group1 = exp(log_sigma_subj_group1);
      Type sigma_subj_group2 = exp(log_sigma_subj_group2);
      
      // Negative log likelihood
      Type nll = 0.0;
      vector<Type> yfit(n);
      vector<Type> random(n);
      
      for (int i = 0; i < n; i++) { // Loop over observations
        random(i) = 0.0;
        for (int j = 0; j < n_subj; j++) { // Subject random effect for observation i
          random(i) += X_subj(i, j) * a_subj(j);
        }
        
        yfit(i) = random(i);
        for (int j = 0; j < n_mean; j++) { // Mean structure for observation i
          yfit(i) += X_mean(i, j) * a(j);
        }
        
        nll += -dnorm(y(i), 
                      yfit(i),
                      X_var(i, 0) * sigma_measure1 +
                        X_var(i, 1) * sigma_measure2, true);
      }
      
      for (int j = 0; j < n_subj; j++) { // Loop over subjects for random effect contribution
        nll += -dnorm(a_subj(j), 
                      Type(0.0), 
                      X_var_subj(j, 0) * sigma_subj_group1 +
                        X_var_subj(j, 1) * sigma_subj_group2, true);
      }
      
      return nll;
    }
    

    and then fitted the model from R:

    library(TMB)
    
    compile('mixed.cpp')
    dyn.load(dynlib('mixed'))
    
    fit_dat <- list(y = dat$y, 
                    X_mean = model.matrix(~ group : measure + 0, data = dat),
                    X_subj = model.matrix(~ subject + 0, data = dat),
                    X_var = model.matrix(~ measure + 0, data = dat),
                    X_var_subj = model.matrix(~ group + 0, data = subset(dat, !duplicated(dat$subject)))
    
    parameters <- list(a = c(-2, 120, 2, 100),
                       a_subj = rep(0, ncol(fit_dat$X_subj)),
                       log_sigma_measure1 = 0,
                       log_sigma_measure2 = 1,
                       log_sigma_subj_group1 = 2,
                       log_sigma_subj_group2 = 2) 
    
    model <- MakeADFun(data = fit_dat, 
                       parameters = parameters,
                       DLL = 'mixed',
                       random = 'a_subj')
    
    fit <- nlminb(start = model$par, 
                  objective = model$fn, 
                  gradient = model$gr)
    

    So far, I am very impressed with the robustness of the TMB package.