Search code examples
rfunctiongtsummary

Create a function for tbl_summary in R


I have an example dataset as below and the codes to generate the table I want. However, I have a lot more variables that I want to add to the table. Repeating the same codes for each variable to create the table would make the codes super long. I am trying to write the tbl_summary to a function, but didn't seem to work and I don't know how to fix it.

library(gtsummary)
library(tidyverse)

test <- data.frame("Gender" = c("Female", "Male", "Male", "Female", "Female", "Female", "Male", "Female", "Female", "Male"),
                   "source" = c("FFQ", "Foodworks", "FFQ", "FFQ", "FFQ", "FFQ", "FFQ", "Foodworks", "Foodworks", "Foodworks"),
                   "EnergyDF_kJ_total" = c(8060.61, 16802.2, 10755.57, 8061.82, 8995.44, 3838.91, 7495.89, 8057.92, 15831.68, 5298.25),
                   "vegetable_score" = c(6.47, 5.55, 8.39, 5.17, 10, 1.82, 3.11, 1.21, 2.76, 1.21)
)

# create table overall
tbl_EnergyDF_kJ_total <-
  test %>%
  select(Gender, EnergyDF_kJ_total) %>%
  tbl_summary(by = Gender, missing = "no",
              type = EnergyDF_kJ_total ~ "continuous",
              statistic = EnergyDF_kJ_total ~ "{mean} ({sd})") %>%
  modify_header(stat_by = "**{level}**") # CHANGE COLUMN HEADER

# REMOVE STATISTICS FOR EnergyDF_kJ_total FROM TABLE
tbl_EnergyDF_kJ_total$table_body <-
  tbl_EnergyDF_kJ_total$table_body %>%
  mutate_at(vars(stat_1, stat_2), ~NA_character_)

# create table stratified by source
tbl_EnergyDF_kJ_total_by_source <-
  test %>%
  # keep the continuous var and the two categorical variables
  select(Gender, EnergyDF_kJ_total, source) %>%
  group_nest(source) %>%
  mutate(
    tbl = map2(
      source, data, 
      ~tbl_summary(.y, by = Gender, 
                   type = EnergyDF_kJ_total ~ "continuous",
                   statistic = EnergyDF_kJ_total ~ "{mean} ({sd})",
                   label = list(EnergyDF_kJ_total = .x), missing = "no") %>%
        add_overall(col_label = "**Overall**") %>%
        add_n()
    )
  ) %>%
  pull(tbl) %>%
  tbl_stack()


# stacking the tables
tbl_stack(list(tbl_EnergyDF_kJ_total, tbl_EnergyDF_kJ_total_by_source)) %>%
  modify_table_body(dplyr::relocate, c("n", "stat_0"), .after = "label") %>%
  # indenting the source rows
  as_gt()  %>%
  gt::tab_style(style = gt::cell_text(indent = gt::px(10), align = "left"), 
                locations = gt::cells_body(columns = gt::vars(label), 
                                           rows = !is.na(n)))

Here are the codes I tried to create a function for the overall table, but it didn't work. Any help would be much appreciated.

x <- function(test, var1, var2) {
test %>%
select(var1, var2) %>%
tbl_summary(by = var1, missing = "no",
type = var2 ~ "continuous",
statistic = var2 ~ "{mean} ({sd})") %>%
modify_header(stat_by = "{level}") # CHANGE COLUMN HEADER
}

test1 <- x(test, Gender, EnergyDF_kJ_total)

Solution

  • Here's an example of your tables that has been functionalized. Happy Programming!

    remotes::install_github("ddsjoberg/gtsummary") # installing version 1.3.5.9007
    library(gtsummary)
    library(tidyverse)
    packageVersion("gtsummary")
    
    test <- 
      data.frame("Gender" = c("Female", "Male", "Male", "Female", "Female", "Female", "Male", "Female", "Female", "Male"),
                 "source" = c("FFQ", "Foodworks", "FFQ", "FFQ", "FFQ", "FFQ", "FFQ", "Foodworks", "Foodworks", "Foodworks"),
                 "EnergyDF_kJ_total" = c(8060.61, 16802.2, 10755.57, 8061.82, 8995.44, 3838.91, 7495.89, 8057.92, 15831.68, 5298.25),
                 "vegetable_score" = c(6.47, 5.55, 8.39, 5.17, 10, 1.82, 3.11, 1.21, 2.76, 1.21))
    
    
    my_table <- function(data, variable) {
      data <- data[c("Gender", "source", variable)]
      
      # create table overall
      tbl_header_row <-
        data %>%
        select(all_of(c("Gender", variable))) %>%
        tbl_summary(by = Gender, missing = "no",
                    type = everything() ~ "continuous",
                    statistic = everything() ~ "{mean} ({sd})") %>%
        modify_header(stat_by = "**{level}**") # CHANGE COLUMN HEADER
      
      # REMOVE STATISTICS FOR variable FROM TABLE
      tbl_header_row$table_body <-
        tbl_header_row$table_body %>%
        mutate_at(vars(stat_1, stat_2), ~NA_character_)
      
      # create table stratified by source
      tbl_variable_by_source <-
        data %>%
        # keep the continuous var and the two categorical variables
        select(all_of(c("Gender", variable, "source"))) %>%
        group_nest(source) %>%
        mutate(
          tbl = map2(
            source, data, 
            ~tbl_summary(.y, by = Gender, 
                         type = everything() ~ "continuous",
                         statistic = everything() ~ "{mean} ({sd})",
                         label = everything() ~ .x, missing = "no") %>%
              add_overall(col_label = "**Overall**") %>%
              add_n()
          )
        ) %>%
        pull(tbl) %>%
        tbl_stack()
      
      # stacking the tables
      tbl_stack(list(tbl_header_row, tbl_variable_by_source)) %>%
        modify_table_body(dplyr::relocate, c("n", "stat_0"), .after = "label")
    }
    
    # building each table individually
    tbl1 <- my_table(test, "EnergyDF_kJ_total")
    tbl2 <- my_table(test, "vegetable_score")
    
    # stacking all tables, and indenting rows
    list(tbl1, tbl2) %>%
      tbl_stack() %>%
      # indenting the source rows
      as_gt()  %>%
      gt::tab_style(style = gt::cell_text(indent = gt::px(10), align = "left"), 
                    locations = gt::cells_body(columns = gt::vars(label), 
                                               rows = !is.na(n)))
    

    enter image description here