I have a dataset with measures for individuals, where each measure represents a specific type of measure (say '1' or '2') and each individual belongs to specific group (say 'A' or 'B'). For a subset of individuals, I have observed both measures '1' and '2'. In this data, the different measures have different variances, and there is a subject-level random effect that has very different variances in the two groups. How would I go about fitting this model in the right way?
Here is an example:
dat <- structure(list(subject = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 101, 102, 102, 103,
103, 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109,
110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 115, 116,
116, 117, 117, 118, 118, 119, 119, 120, 120, 121, 121, 122, 122,
123, 123, 124, 124, 125, 125, 126, 126, 127, 127, 128, 128, 129,
129, 130, 130),
group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), class = "factor"),
measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("1", "2"), class = "factor"),
y = c(-1.71,
121.74, -1.57, 109.96, -0.64, 101.67, -0.13, 120.64, 1.47,
101.99, -4.51, 133.18, -2.9, 117.95, -0.97, 126.94, -1.44,
105.1, -1.52, 122.2, -2.29, 130.17, -0.35, 133.14, -0.94,
112.68, -0.89, 105.37, -2.49, 126.75, -2.61, 139.25, -2.13,
113.43, 0.61, 140.76, -0.75, 129.17, 1.94, 139.4, -0.49,
119.03, -2.09, 89.97, -2.76, 107.85, 1.61, 136.31, -0.55,
128.6, 0.41, 86.66, 0.54, 100.03, 2.46, 115.37, 6.94, 109.34,
3.78, 102.34, -4.46, 104.06, 1.48, 105.06, 3.98, 85.21, 1.31,
103.17, -3.35, 110.83, 2.75, 98.38, -2.43, 101.57, 2.2, 120.45,
-4.06, 101.25, 3.85, 99.38, 2.17, 108, 9.27, 100.76, 3.27,
110.3, 1.22, 98.91, 1.62, 105.65, 4.64, 113.07, 8.14, 108.75,
6.84, 73.08, 1.42, 99.41, -0.5, 95.25, 1.42, 3.76, 102.95,
85.45, -2.71, -0.48, 137.34, 114.61, -0.42, 1.71, 98.82,
83.06, -3.51, -0.32, 109.66, 91.99, -0.46, -1.35, 113.88,
97.32, -0.93, 1.17, 111.26, 103.9, -4.11, 6.78, 106.36, 88.22,
-0.85, -6.56, 137.39, 112.19, -0.91, 3.26, 122.53, 105.18,
-0.61, 4.25, 111.01, 95.85, -2.68, 3.1, 142.26, 114.44, -0.31,
3.76, 127.61, 102.26, -1.82, 4.01, 116.61, 97.1, -3.61, 0.9,
107.73, 90.6, -0.13, 3.78, 108.73, 95.12)),
row.names = c(NA, -160L), class = "data.frame")
I can fit a mixed-effects model with nlme:
init <- c(-1.2, 120, 2, 100)
model1 <- nlme(y ~ a,
data = dat,
fixed = list(a ~ group : measure + 0),
random = a ~ 1,
groups = ~ subject,
start = init,
weights = varIdent(form = ~ 1 | measure))
Is there a way to fit the model such that the random effect has different variances across groups? I have a feeling that this should be achievable by using a correlation structure, but so far I have been unsuccessful.
In reality, my model is nonlinear and more complex than the above, so the problem can unfortunately not be solved by crossed random effects with lmer
(but maybe a crossed random effects hack for nlme?)
Following, Ben Bolker's suggestion in the comment, I ended up implementing the model in the Template Model Builder framework (Thanks for pointing me in this direction Ben!) The model did not converge for the given simulated dataset, but it did for other simulations and fitted the correct model with substantially improved likelihood values (identical to the nlme model when removing the extra variance parameter).
To achieve this, I first made the model code in mixed.cpp
:
#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator() ()
{
// Data and design variables
DATA_VECTOR(y); // Outcome
DATA_ARRAY(X_mean); // Covariates
DATA_ARRAY(X_subj); // Subject design matrix
DATA_ARRAY(X_var); // Variance design matrix
DATA_ARRAY(X_var_subj); // Subject-level random-effect variance design matrix
int n = y.size(); // Number of observations
int n_mean = X_mean.dim[1]; // Number of observations
int n_subj = X_subj.dim[1]; // Number of subjects
// Parameters
PARAMETER_VECTOR(a);
PARAMETER_VECTOR(a_subj);
PARAMETER(log_sigma_measure1);
PARAMETER(log_sigma_measure2);
PARAMETER(log_sigma_subj_group1);
PARAMETER(log_sigma_subj_group2);
// Variance parameters
Type sigma_measure1 = exp(log_sigma_measure1);
Type sigma_measure2 = exp(log_sigma_measure2);
Type sigma_subj_group1 = exp(log_sigma_subj_group1);
Type sigma_subj_group2 = exp(log_sigma_subj_group2);
// Negative log likelihood
Type nll = 0.0;
vector<Type> yfit(n);
vector<Type> random(n);
for (int i = 0; i < n; i++) { // Loop over observations
random(i) = 0.0;
for (int j = 0; j < n_subj; j++) { // Subject random effect for observation i
random(i) += X_subj(i, j) * a_subj(j);
}
yfit(i) = random(i);
for (int j = 0; j < n_mean; j++) { // Mean structure for observation i
yfit(i) += X_mean(i, j) * a(j);
}
nll += -dnorm(y(i),
yfit(i),
X_var(i, 0) * sigma_measure1 +
X_var(i, 1) * sigma_measure2, true);
}
for (int j = 0; j < n_subj; j++) { // Loop over subjects for random effect contribution
nll += -dnorm(a_subj(j),
Type(0.0),
X_var_subj(j, 0) * sigma_subj_group1 +
X_var_subj(j, 1) * sigma_subj_group2, true);
}
return nll;
}
and then fitted the model from R:
library(TMB)
compile('mixed.cpp')
dyn.load(dynlib('mixed'))
fit_dat <- list(y = dat$y,
X_mean = model.matrix(~ group : measure + 0, data = dat),
X_subj = model.matrix(~ subject + 0, data = dat),
X_var = model.matrix(~ measure + 0, data = dat),
X_var_subj = model.matrix(~ group + 0, data = subset(dat, !duplicated(dat$subject)))
parameters <- list(a = c(-2, 120, 2, 100),
a_subj = rep(0, ncol(fit_dat$X_subj)),
log_sigma_measure1 = 0,
log_sigma_measure2 = 1,
log_sigma_subj_group1 = 2,
log_sigma_subj_group2 = 2)
model <- MakeADFun(data = fit_dat,
parameters = parameters,
DLL = 'mixed',
random = 'a_subj')
fit <- nlminb(start = model$par,
objective = model$fn,
gradient = model$gr)
So far, I am very impressed with the robustness of the TMB
package.