I have a dataset in a long format of 60 repeated measurements taken within 19 patients (ID
). Patients have had a differing amount of measurements (2 measurements [n=11], followed by 5 measurements [n=5], 3 [n=1], 4 [n=1], and 6 measurements [n=1], with varying time-intervals (fu_time
measured in years). The data looks as follows:
library(tidyverse)
library(magrittr)
library(broom.mixed)
library(nlme)
library(ggeffects)
library(sjPlot)
mydata <-
structure(list(ID = c(2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4,
7, 7, 8, 8, 8, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 20, 20,
21, 21, 22, 22, 24, 24, 24, 24, 25, 25, 27, 27, 29, 29, 29, 29,
29, 30, 30, 31, 31, 34, 34, 34, 34, 34, 36, 36, 37, 37), fu_time = c(0,
0.545, 2.168, 2.68, 3.184, 5.695, 0, 1.892, 0, 0.939, 1.451,
1.955, 4.353, 0, 4.449, 0, 0.465, 4.005, 0, 0.364, 0.857, 1.361,
3.759, 0, 0.46, 0.953, 1.457, 4.03, 0, 0.268, 0, 0.383, 0, 0.556,
0, 0.665, 1.158, 1.662, 0, 5.558, 0, 1.207, 0, 1.339, 1.793,
2.335, 4.194, 0, 0.734, 0, 2.27, 0, 0.613, 1.106, 1.648, 4.197,
0, 0.556, 0, 0.468), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"),
age = c(56.6, 56.6, 56.6, 56.6, 56.6, 56.6, 43.2, 43.2, 52,
52, 52, 52, 52, 58.5, 58.5, 57.4, 57.4, 57.4, 57.8, 57.8,
57.8, 57.8, 57.8, 52.4, 52.4, 52.4, 52.4, 52.4, 70.8, 70.8,
61.4, 61.4, 59.2, 59.2, 61.5, 61.5, 61.5, 61.5, 48.9, 48.9,
54.2, 54.2, 50.1, 50.1, 50.1, 50.1, 50.1, 55.4, 55.4, 48.6,
48.6, 64.2, 64.2, 64.2, 64.2, 64.2, 68.3, 68.3, 66.7, 66.7
), age_standardized = c(-0.112074178471796, -0.112074178471796,
-0.112074178471796, -0.112074178471796, -0.112074178471796,
-0.112074178471796, -1.75787581301653, -1.75787581301653,
-0.677050858987151, -0.677050858987151, -0.677050858987151,
-0.677050858987151, -0.677050858987151, 0.121285754784546,
0.121285754784546, -0.0138173644691261, -0.0138173644691261,
-0.0138173644691261, 0.035311042532209, 0.035311042532209,
0.035311042532209, 0.035311042532209, 0.035311042532209,
-0.627922451985816, -0.627922451985816, -0.627922451985816,
-0.627922451985816, -0.627922451985816, 1.6319842700756,
1.6319842700756, 0.477466705544226, 0.477466705544226, 0.207260467036883,
0.207260467036883, 0.48974880729456, 0.48974880729456, 0.48974880729456,
0.48974880729456, -1.0577960132475, -1.0577960132475, -0.406844620479807,
-0.406844620479807, -0.910410792243494, -0.910410792243494,
-0.910410792243494, -0.910410792243494, -0.910410792243494,
-0.259459399475802, -0.259459399475802, -1.0946423184985,
-1.0946423184985, 0.821365554553573, 0.821365554553573, 0.821365554553573,
0.821365554553573, 0.821365554553573, 1.32493172631726, 1.32493172631726,
1.12841809831192, 1.12841809831192), hypertension = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), total_cholesterol = c(3.8, 3.8,
3.8, 3.8, 3.8, 3.8, 3.6, 3.6, 5.208, 5.208, 5.208, 5.208,
5.208, 5.2, 5.2, 4.5, 4.5, 4.5, 3.9, 3.9, 3.9, 3.9, 3.9,
4.3, 4.3, 4.3, 4.3, 4.3, 3.94, 3.94, 5.468, 5.468, 4.76,
4.76, 4.2, 4.2, 4.2, 4.2, 3.1, 3.1, 4.796, 4.796, 5.4, 5.4,
5.4, 5.4, 5.4, 4.7, 4.7, 5.516, 5.516, 4.6, 4.6, 4.6, 4.6,
4.6, 4.096, 4.096, 7.2, 7.2), log_outcome = c(8.42915657313723,
8.7009137691239, 8.86077267189618, 8.96246948326419, 9.03130758075829,
9.12345262370784, 5.92634460299296, 6.50615330482121, 8.82605479483354,
9.28945716416388, 9.31314123590531, 9.29155486879811, 9.54380093520124,
6.3387281707835, 8.42119585511192, 8.09141312479266, 8.08323601190682,
8.08345205795634, 8.84248862871191, 8.79677243460523, 8.83902619880979,
8.88249368941836, 9.3739060089624, 9.21813577692571, 9.41892283483421,
9.51110081224594, 9.44403926910489, 9.90810686887857, 11.2473988053982,
11.3367621074836, 9.14484138974349, 9.13587164023184, 9.15404308484749,
9.29984347265724, 9.6945365587177, 9.51513026752813, 9.53564697229274,
9.59066055858977, 7.06643401737976, 7.7842004897582, 8.40862917559632,
8.74727168230293, 4.10135196585983, 5.72042611547146, 5.91627217786197,
6.60537958438705, 7.17379497306441, 9.02372811447485, 9.162613156671,
6.99925650048289, 7.38343760944252, 9.28357810757782, 9.35451537017142,
9.34029788589738, 9.34596520171378, 9.29507953850898, 10.4746495321547,
10.5224161779052, 8.94686953356746, 8.96847859420429)), row.names = c(NA,
60L), class = "data.frame")
head(mydata)
ID fu_time sex age age_standardized hypertension total_cholesterol log_outcome
1 2 0.000 Female 56.6 -0.1120742 No 3.8 8.429157
2 2 0.545 Female 56.6 -0.1120742 No 3.8 8.700914
3 2 2.168 Female 56.6 -0.1120742 No 3.8 8.860773
4 2 2.680 Female 56.6 -0.1120742 No 3.8 8.962469
5 2 3.184 Female 56.6 -0.1120742 No 3.8 9.031308
6 2 5.695 Female 56.6 -0.1120742 No 3.8 9.123453
The goal was to assess what the risk factors are for progression in the outcome. The outcome has been natural-log-transformed due to being ver skewed. The linear mixed model I specified with the corresponding exponentiated fixed effects:
ris <-
lme(fixed=log_outcome ~ fu_time + age*fu_time + sex*fu_time + hypertension*fu_time +
total_cholesterol*fu_time,
random=~ 1 + fu_time|ID,
data=mydata,
method="ML")
# get coefficients
fixef_coefs <-
tidy(ris, conf.int=TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~round(exp(.), 2)) %>%
mutate(p.value=round(p.value, 3))
fixef_coefs <- fixef_coefs[1:10, ] # remove random effects from dataset
fixef_coefs
# A tibble: 10 x 5
term estimate conf.low conf.high p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.67 0.03 16.8 0.82
2 fu_time 2.21 1.1 4.45 0.043
3 age 1.23 1.15 1.3 0
4 sexMale 2.02 0.8 5.15 0.162
5 hypertensionYes 0.4 0.16 1 0.07
6 total_cholesterol 0.58 0.37 0.91 0.032
7 fu_time:age 0.98 0.97 0.99 0.001
8 fu_time:sexMale 1.08 0.9 1.29 0.42
9 fu_time:hypertensionYes 1.22 1.04 1.43 0.03
10 fu_time:total_cholesterol 1.12 1.03 1.22 0.018
So age, hypertension, and total cholesterol are associated with change in the outcome over time, as they show a significant interaction with fu_time
. My next step is to plot the marginal effects of for example hypertension, preferrably using ggemmeans/sjplot.
I'm however stuck on how to achieve this. I've read through the vignettes on ggeffects, the difference between ggeffects and ggemmeans, and plotting interaction effects in sjplot.
I've tried several things.
(1) Running ggemmeans(ris, type="random", terms="fu_time:age")
, which gives the error that there is not a term with that name in the model.
(2) Running plot_model(ris, type = "int")
. This produces pots for the covariates, but there are two problems with this: first, the plots are on the log-scale while I want them on the back-transformed/exponentiated scale, and two I'm not sure if these are the actual interaction terms with fu_time.
Preferrably I want to make my own/a new dataset using ggemmeans (so keeping continuous variables at their means and categorical variables at their observed sample proportions) so I can do computations on variables and subsequently make plots myself.
A bit off-topic, but I suggest parameters::model_parameters()
for your summary tables:
library(parameters)
library(nlme)
library(ggeffects)
ris <- lme(
log_outcome ~ fu_time + age * fu_time + sex * fu_time + hypertension * fu_time + total_cholesterol * fu_time,
random = ~ 1 + fu_time | ID,
data = mydata,
method = "ML"
)
model_parameters(ris, effects = "fixed", exponentiate = TRUE)
#> # Fixed Effects
#>
#> Parameter | Coefficient | SE | 95% CI | t | df | p
#> -------------------------------------------------------------------------------------------
#> (Intercept) | 0.67 | 1.17 | [0.03, 16.76] | -0.23 | 36 | 0.820
#> fu time | 2.21 | 0.84 | [1.10, 4.45] | 2.09 | 36 | 0.043
#> age | 1.23 | 0.04 | [1.15, 1.30] | 6.51 | 14 | < .001
#> sex [Male] | 2.02 | 0.97 | [0.80, 5.15] | 1.48 | 14 | 0.162
#> hypertension [Yes] | 0.40 | 0.19 | [0.16, 1.00] | -1.96 | 14 | 0.070
#> total cholesterol | 0.58 | 0.13 | [0.37, 0.91] | -2.37 | 14 | 0.032
#> fu time * age | 0.98 | 6.43e-03 | [0.97, 0.99] | -3.44 | 36 | 0.001
#> fu time * sex [Male] | 1.08 | 0.10 | [0.90, 1.29] | 0.82 | 36 | 0.420
#> fu time * hypertension [Yes] | 1.22 | 0.11 | [1.04, 1.43] | 2.26 | 36 | 0.030
#> fu time * total cholesterol | 1.12 | 0.05 | [1.03, 1.22] | 2.47 | 36 | 0.018
#>
#> Uncertainty intervals (equal-tailed) and p values (two-tailed) computed using a
#> Wald t-distribution approximation.
Created on 2021-11-17 by the reprex package (v2.0.1)
Regarding your plots, is this what you're looking for?
ggpredict(ris, c("age", "fu_time")) |> plot()
ggpredict(ris, c("hypertension", "fu_time")) |> plot()
ggpredict(ris, c("total_cholesterol", "fu_time")) |> plot()
Created on 2021-11-17 by the reprex package (v2.0.1)