merging rows from tbl_summary and tbl_regression using tbl_merge from the gtsummary package

Long time browser, first time poster.

I would like to merge multiple frequency tables created using tbl_summary(), and then stacked with tbl_stack(), with multiple tables of univariate logistic regressions created using tbl_regression(), also stacked using tbl_stack().

I would like the columns to be (1) the (binary) dependent variable names, (2) the frequency % of the dependent variable by strata of a binary independent variable, (3) the OR from the regression, (4) and the 95% confidence interval from the regression. I would like the rows of the merged table to be labeled as the dependent variables from the regressions.

I have tried stacking then merging, in which case the rows are grouped from the tbl_summary stack and then the tbl_regression stack, but with staggered empty columns. I have also tried merging then stacking, in which case the rows are sequentially staggered, with one from tbl_summary followed by one from tbl_regression and so on.

The problem I think I'm running up against is that the variable names in the tbl_summary tables are preserved from the data set (the desired dependent variables), whereas the variable names in the tbl_regression tables are the independent variable.

I am not fluent in the gt package, which I think could help me here...

Any advice and/or sample code is appreciated!

Reprex below.

library(gtsummary)
library(dplyr)
packageVersion("gtsummary")
#> '1.6.2'

ex <- trial %>%
  mutate(gradeI = case_when(grade == "I" ~ 1,
                            TRUE ~ 0),
         gradeII = case_when(grade == "II" ~ 1,
                            TRUE ~ 0),
         gradeIII = case_when(grade == "III" ~ 1,
                            TRUE ~ 0),
         agegp = case_when(age<47 ~ 0,
                            TRUE ~ 1))

t1 <- ex %>% select(gradeI, agegp) %>% tbl_summary(by = agegp)
t2 <- ex %>% select(gradeII, agegp) %>% tbl_summary(by = agegp)
t3 <- ex %>% select(gradeIII, agegp) %>% tbl_summary(by = agegp)

#glm1 <- glm(grade1 ~ agegp, family = binomial(), data = ex)
#glm2 <- glm(grade2 ~ agegp, family = binomial(), data = ex)
#glm2 <- glm(grade2 ~ agegp, family = binomial(), data = ex)

glm1 <- ex %>%
  select(gradeI, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeI,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(agegp ~ "Grade I by age group"))
glm2 <- ex %>%
  select(gradeII, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeII,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(agegp ~ "Grade II by age group"))
glm3 <- ex %>%
  select(gradeIII, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeIII,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(agegp ~ "Grade II by age group"))

merge1 <- tbl_merge(tbls = list(t1, glm1),
          tab_spanner = c("**Tumor grade frequencies<br>by age group**", "**<br>Univariate regression**"))
merge2 <- tbl_merge(tbls = list(t2, glm2),
          tab_spanner = c("**Tumor grade frequencies<br>by age group**", "**<br>Univariate regression**"))
merge3 <- tbl_merge(tbls = list(t3, glm3),
          tab_spanner = c("**Tumor grade frequencies<br>by age group**", "**<br>Univariate regression**"))

tbl_stack(tbls = list(merge1, merge2, merge3))

Solution

You need to match both the variable and the labels in both tables. These are stored in the table body. I think that you should only call the tab_spanner once so I have flipped the order of tbl_stack and tbl_merge. I have used the common label "Grade X" for your outcomes. I guess you can set the labels in the call for tbl_summary but I did it directly in the table_body.

library(gtsummary)
library(dplyr)

ex <- trial %>%
  mutate(gradeI = case_when(grade == "I" ~ 1,
                            TRUE ~ 0),
         gradeII = case_when(grade == "II" ~ 1,
                             TRUE ~ 0),
         gradeIII = case_when(grade == "III" ~ 1,
                              TRUE ~ 0),
         agegp = case_when(age<47 ~ 0,
                           TRUE ~ 1))

t1 <- ex %>% select(gradeI, agegp) %>% tbl_summary(by = agegp)
t2 <- ex %>% select(gradeII, agegp) %>% tbl_summary(by = agegp)
t3 <- ex %>% select(gradeIII, agegp) %>% tbl_summary(by = agegp)

# Set labels
t1$table_body$label <- "Grade I"
t2$table_body$label <- "Grade II"
t3$table_body$label <- "Grade III"


glm1 <- ex %>%
  select(gradeI, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeI,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    # label = list(agegp ~ "Grade I by age group"))
    label = list(agegp ~ "Grade I"))
glm2 <- ex %>%
  select(gradeII, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeII,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(agegp ~ "Grade II"))
glm3 <- ex %>%
  select(gradeIII, agegp) %>%
  tbl_uvregression(
    method = glm,
    y = gradeIII,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(agegp ~ "Grade III"))

# Match variables
glm1$table_body$variable <- "gradeI"
glm2$table_body$variable <- "gradeII"
glm3$table_body$variable <- "gradeIII"

tbl_merge(list(
  tbl_stack(list(t1,t2,t3)), 
  tbl_stack(list(glm1,glm2,glm3))), 
  tab_spanner = c("**Tumor grade frequencies<br>by age group**", "**<br>Univariate regression**")
)