Search code examples
rmixed-modelsnlmesjplotmarginal-effects

Plotting population-level predictions from lme model on repeated measurements data using nlme, ggeffects, and sjplot


I have a dataset in a long format of 60 repeated measurements taken within 19 patients (ID). Patients have had a differing amount of measurements (2 measurements [n=11], followed by 5 measurements [n=5], 3 [n=1], 4 [n=1], and 6 measurements [n=1], with varying time-intervals (fu_time measured in years). The data looks as follows:

library(tidyverse)
library(magrittr)
library(broom.mixed)
library(nlme)        
library(ggeffects)
library(sjPlot)

mydata <- 
structure(list(ID = c(2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 
7, 7, 8, 8, 8, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 20, 20, 
21, 21, 22, 22, 24, 24, 24, 24, 25, 25, 27, 27, 29, 29, 29, 29, 
29, 30, 30, 31, 31, 34, 34, 34, 34, 34, 36, 36, 37, 37), fu_time = c(0, 
0.545, 2.168, 2.68, 3.184, 5.695, 0, 1.892, 0, 0.939, 1.451, 
1.955, 4.353, 0, 4.449, 0, 0.465, 4.005, 0, 0.364, 0.857, 1.361, 
3.759, 0, 0.46, 0.953, 1.457, 4.03, 0, 0.268, 0, 0.383, 0, 0.556, 
0, 0.665, 1.158, 1.662, 0, 5.558, 0, 1.207, 0, 1.339, 1.793, 
2.335, 4.194, 0, 0.734, 0, 2.27, 0, 0.613, 1.106, 1.648, 4.197, 
0, 0.556, 0, 0.468), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    age = c(56.6, 56.6, 56.6, 56.6, 56.6, 56.6, 43.2, 43.2, 52, 
    52, 52, 52, 52, 58.5, 58.5, 57.4, 57.4, 57.4, 57.8, 57.8, 
    57.8, 57.8, 57.8, 52.4, 52.4, 52.4, 52.4, 52.4, 70.8, 70.8, 
    61.4, 61.4, 59.2, 59.2, 61.5, 61.5, 61.5, 61.5, 48.9, 48.9, 
    54.2, 54.2, 50.1, 50.1, 50.1, 50.1, 50.1, 55.4, 55.4, 48.6, 
    48.6, 64.2, 64.2, 64.2, 64.2, 64.2, 68.3, 68.3, 66.7, 66.7
    ), age_standardized = c(-0.112074178471796, -0.112074178471796, 
    -0.112074178471796, -0.112074178471796, -0.112074178471796, 
    -0.112074178471796, -1.75787581301653, -1.75787581301653, 
    -0.677050858987151, -0.677050858987151, -0.677050858987151, 
    -0.677050858987151, -0.677050858987151, 0.121285754784546, 
    0.121285754784546, -0.0138173644691261, -0.0138173644691261, 
    -0.0138173644691261, 0.035311042532209, 0.035311042532209, 
    0.035311042532209, 0.035311042532209, 0.035311042532209, 
    -0.627922451985816, -0.627922451985816, -0.627922451985816, 
    -0.627922451985816, -0.627922451985816, 1.6319842700756, 
    1.6319842700756, 0.477466705544226, 0.477466705544226, 0.207260467036883, 
    0.207260467036883, 0.48974880729456, 0.48974880729456, 0.48974880729456, 
    0.48974880729456, -1.0577960132475, -1.0577960132475, -0.406844620479807, 
    -0.406844620479807, -0.910410792243494, -0.910410792243494, 
    -0.910410792243494, -0.910410792243494, -0.910410792243494, 
    -0.259459399475802, -0.259459399475802, -1.0946423184985, 
    -1.0946423184985, 0.821365554553573, 0.821365554553573, 0.821365554553573, 
    0.821365554553573, 0.821365554553573, 1.32493172631726, 1.32493172631726, 
    1.12841809831192, 1.12841809831192), hypertension = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), total_cholesterol = c(3.8, 3.8, 
    3.8, 3.8, 3.8, 3.8, 3.6, 3.6, 5.208, 5.208, 5.208, 5.208, 
    5.208, 5.2, 5.2, 4.5, 4.5, 4.5, 3.9, 3.9, 3.9, 3.9, 3.9, 
    4.3, 4.3, 4.3, 4.3, 4.3, 3.94, 3.94, 5.468, 5.468, 4.76, 
    4.76, 4.2, 4.2, 4.2, 4.2, 3.1, 3.1, 4.796, 4.796, 5.4, 5.4, 
    5.4, 5.4, 5.4, 4.7, 4.7, 5.516, 5.516, 4.6, 4.6, 4.6, 4.6, 
    4.6, 4.096, 4.096, 7.2, 7.2), log_outcome = c(8.42915657313723, 
    8.7009137691239, 8.86077267189618, 8.96246948326419, 9.03130758075829, 
    9.12345262370784, 5.92634460299296, 6.50615330482121, 8.82605479483354, 
    9.28945716416388, 9.31314123590531, 9.29155486879811, 9.54380093520124, 
    6.3387281707835, 8.42119585511192, 8.09141312479266, 8.08323601190682, 
    8.08345205795634, 8.84248862871191, 8.79677243460523, 8.83902619880979, 
    8.88249368941836, 9.3739060089624, 9.21813577692571, 9.41892283483421, 
    9.51110081224594, 9.44403926910489, 9.90810686887857, 11.2473988053982, 
    11.3367621074836, 9.14484138974349, 9.13587164023184, 9.15404308484749, 
    9.29984347265724, 9.6945365587177, 9.51513026752813, 9.53564697229274, 
    9.59066055858977, 7.06643401737976, 7.7842004897582, 8.40862917559632, 
    8.74727168230293, 4.10135196585983, 5.72042611547146, 5.91627217786197, 
    6.60537958438705, 7.17379497306441, 9.02372811447485, 9.162613156671, 
    6.99925650048289, 7.38343760944252, 9.28357810757782, 9.35451537017142, 
    9.34029788589738, 9.34596520171378, 9.29507953850898, 10.4746495321547, 
    10.5224161779052, 8.94686953356746, 8.96847859420429)), row.names = c(NA, 
60L), class = "data.frame")

head(mydata)
  ID fu_time    sex  age age_standardized hypertension total_cholesterol log_outcome
1  2   0.000 Female 56.6       -0.1120742           No               3.8    8.429157
2  2   0.545 Female 56.6       -0.1120742           No               3.8    8.700914
3  2   2.168 Female 56.6       -0.1120742           No               3.8    8.860773
4  2   2.680 Female 56.6       -0.1120742           No               3.8    8.962469
5  2   3.184 Female 56.6       -0.1120742           No               3.8    9.031308
6  2   5.695 Female 56.6       -0.1120742           No               3.8    9.123453

The goal was to assess what the risk factors are for progression in the outcome. The outcome has been natural-log-transformed due to being ver skewed. The linear mixed model I specified with the corresponding exponentiated fixed effects:

ris <- 
  lme(fixed=log_outcome ~ fu_time + age*fu_time + sex*fu_time + hypertension*fu_time + 
        total_cholesterol*fu_time, 
      random=~ 1 + fu_time|ID, 
      data=mydata, 
      method="ML")

# get coefficients
fixef_coefs <- 
  tidy(ris, conf.int=TRUE) %>% 
  select(term, estimate, conf.low, conf.high, p.value) %>% 
  mutate_at(vars(estimate, conf.low, conf.high), ~round(exp(.), 2)) %>% 
  mutate(p.value=round(p.value, 3))
fixef_coefs <- fixef_coefs[1:10, ] # remove random effects from dataset

fixef_coefs
# A tibble: 10 x 5
   term                      estimate conf.low conf.high p.value
   <chr>                        <dbl>    <dbl>     <dbl>   <dbl>
 1 (Intercept)                   0.67     0.03     16.8    0.82 
 2 fu_time                       2.21     1.1       4.45   0.043
 3 age                           1.23     1.15      1.3    0    
 4 sexMale                       2.02     0.8       5.15   0.162
 5 hypertensionYes               0.4      0.16      1      0.07 
 6 total_cholesterol             0.58     0.37      0.91   0.032
 7 fu_time:age                   0.98     0.97      0.99   0.001
 8 fu_time:sexMale               1.08     0.9       1.29   0.42 
 9 fu_time:hypertensionYes       1.22     1.04      1.43   0.03 
10 fu_time:total_cholesterol     1.12     1.03      1.22   0.018

So age, hypertension, and total cholesterol are associated with change in the outcome over time, as they show a significant interaction with fu_time. My next step is to plot the marginal effects of for example hypertension, preferrably using ggemmeans/sjplot.

I'm however stuck on how to achieve this. I've read through the vignettes on ggeffects, the difference between ggeffects and ggemmeans, and plotting interaction effects in sjplot.

I've tried several things.

(1) Running ggemmeans(ris, type="random", terms="fu_time:age"), which gives the error that there is not a term with that name in the model.

(2) Running plot_model(ris, type = "int"). This produces pots for the covariates, but there are two problems with this: first, the plots are on the log-scale while I want them on the back-transformed/exponentiated scale, and two I'm not sure if these are the actual interaction terms with fu_time.

Preferrably I want to make my own/a new dataset using ggemmeans (so keeping continuous variables at their means and categorical variables at their observed sample proportions) so I can do computations on variables and subsequently make plots myself.


Solution

  • A bit off-topic, but I suggest parameters::model_parameters() for your summary tables:

    library(parameters)
    library(nlme)        
    library(ggeffects)
    
    ris <- lme(
      log_outcome ~ fu_time + age * fu_time + sex * fu_time + hypertension * fu_time + total_cholesterol * fu_time, 
      random =  ~ 1 + fu_time | ID, 
      data = mydata, 
      method = "ML"
    )
    
    model_parameters(ris, effects = "fixed", exponentiate = TRUE)
    #> # Fixed Effects
    #> 
    #> Parameter                    | Coefficient |       SE |        95% CI |     t | df |      p
    #> -------------------------------------------------------------------------------------------
    #> (Intercept)                  |        0.67 |     1.17 | [0.03, 16.76] | -0.23 | 36 | 0.820 
    #> fu time                      |        2.21 |     0.84 | [1.10,  4.45] |  2.09 | 36 | 0.043 
    #> age                          |        1.23 |     0.04 | [1.15,  1.30] |  6.51 | 14 | < .001
    #> sex [Male]                   |        2.02 |     0.97 | [0.80,  5.15] |  1.48 | 14 | 0.162 
    #> hypertension [Yes]           |        0.40 |     0.19 | [0.16,  1.00] | -1.96 | 14 | 0.070 
    #> total cholesterol            |        0.58 |     0.13 | [0.37,  0.91] | -2.37 | 14 | 0.032 
    #> fu time * age                |        0.98 | 6.43e-03 | [0.97,  0.99] | -3.44 | 36 | 0.001 
    #> fu time * sex [Male]         |        1.08 |     0.10 | [0.90,  1.29] |  0.82 | 36 | 0.420 
    #> fu time * hypertension [Yes] |        1.22 |     0.11 | [1.04,  1.43] |  2.26 | 36 | 0.030 
    #> fu time * total cholesterol  |        1.12 |     0.05 | [1.03,  1.22] |  2.47 | 36 | 0.018
    #> 
    #> Uncertainty intervals (equal-tailed) and p values (two-tailed) computed using a
    #>   Wald t-distribution approximation.
    

    Created on 2021-11-17 by the reprex package (v2.0.1)

    Regarding your plots, is this what you're looking for?

    ggpredict(ris, c("age", "fu_time")) |> plot()
    

    ggpredict(ris, c("hypertension", "fu_time")) |> plot()
    

    ggpredict(ris, c("total_cholesterol", "fu_time")) |> plot()
    

    Created on 2021-11-17 by the reprex package (v2.0.1)