Search code examples
rfunctionsummarysmd

How to calculate Standardized Mean Difference for Table1 Package in R?


I am using the package "table1" to create a fancy table one with extra column containing the standardized mean difference of continuous variables in my dataset.

The SMD should be a combination between the treatment and control groups stratified for a given variable within the table.

I am struggling to figure out a good way of doing this and would love some help creating the function to calculate SMD.

Here is some sample code:

f <- function(x, n, ...) factor(sample(x, n, replace=T, ...), levels=x)
set.seed(427)

n <- 146
dat <- data.frame(id=1:n)
dat$treat <- f(c("Placebo", "Treated"), n, prob=c(1, 2)) # 2:1 randomization
dat$age   <- sample(18:65, n, replace=TRUE)
dat$sex   <- f(c("Female", "Male"), n, prob=c(.6, .4))  # 60% female
dat$wt    <- round(exp(rnorm(n, log(70), 0.23)), 1)

# Add some missing data
dat$wt[sample.int(n, 5)] <- NA

label(dat$age)   <- "Age"
label(dat$sex)   <- "Sex"
label(dat$wt)    <- "Weight"
label(dat$treat) <- "Treatment Group"

units(dat$age)   <- "years"
units(dat$wt)    <- "kg"

my.render.cont <- function(x) {
    with(stats.apply.rounding(stats.default(x), digits=2), c("",
        "Mean (SD)"=sprintf("%s (&plusmn; %s)", MEAN, SD)))
}
my.render.cat <- function(x) {
    c("", sapply(stats.default(x), function(y) with(y,
        sprintf("%d (%0.0f %%)", FREQ, PCT))))
}

    #My attempt at an SMD function
     smd_value <- function(x, ...) {
 x <- x[-length(x)]  # Remove "overall" group
  # Construct vectors of data y, and groups (strata) g
  y <- unlist(x)
  g <- factor(rep(1:length(x), times=sapply(x, length)))
  if (is.numeric(y) & g==1) {
    # For numeric variables, calculate SMD
    smd_val1 <- (mean(y)/sd(y))
  } else if (is.numeric(y) & g==2) {
    # For numeric variables, calculate SMD
    smd_val2 <- (mean(y)/sd(y))
  } else {print("--")
  }
  smd_val <- smdval2 - smdval1
}

table1(~ age + sex + wt | treat, data=dat,  render.continuous=my.render.cont, render.categorical=my.render.cat, extra.col=list(`SMD`=smd_value))

I get the following error:

"Error in if (is.numeric(y) & g == 1) { : the condition has length > 1"

Any insight into a potential solution?

Thanks!


Solution

  • Here you go!

    # Install Packages---------------------------------------------------
    library(stddiff)
    library(cobalt)
    library(table1)
    library(Hmisc)
    
    #Using 'mtcars' as an example
    my_data<-mtcars
    
    # Format variables--------------------------------------------------------------
    
    # amd - Transmission (0 = automatic; 1 = manual)
    my_data$am <-factor(my_data$am,
                    levels = c(0,1),
                    labels =c("Automatic","Manual"))
    
    label(my_data$am) <-"Transmission Type" #adding a label for the variable
    
    # vs - Engine (0 = V-shaped, 1 = Straight)
    my_data$vs <-factor(my_data$vs,
                    levels = c(0,1),
                    labels =c("V-shaped","Straight"))
    
    label(my_data$vs) <-"Engine"
    
    # Adding a label to the numeric variables
    label(my_data$mpg)<-"Miles per gallon"
    label(my_data$hp)<-"Horsepower"
    
    # SMD FUNCTION------------------------------------------------------------------
    SMD_value <- function(x, ...) {
      # Construct vectors of data y, and groups (strata) g
      y <- unlist(x)
      g <- factor(rep(1:length(x), times=sapply(x, length)))
      if (is.numeric(y)) {
        # For numeric variables
        try({a<-data.frame(y)
        a$g<-g
        smd<-(as.data.frame(stddiff.numeric(data=a,gcol = "g", vcol = "y")))$stddiff
        },silent=TRUE)
      } else {
      # For categorical variables
        try({
          a<-data.frame(y)
          a$g<-g
          smd<-(abs((bal.tab(a, treat = "g",data=a,binary="std",continuous =         
     "std",s.d.denom = "pooled",stats=c("mean.diffs"))$Balance)$Diff.Un))
        },silent=TRUE)
      }
      c("",format(smd,digits=2)) #Formatting number of digits
    }
    
    # CONTINUOUS VARIABLES FORMATTING-----------------------------------------------
    my.render.cont <- function(x) {
      with(stats.default(x), 
       c("",
         
         "Mean (SD)" = sprintf("%s (%s)",
                               round_pad(MEAN, 1),
                               round_pad(SD, 1)),
         
         "Median (IQR)" = sprintf("%s (%s, %s)",
                                  round_pad(MEDIAN, 1), 
                                  round_pad(Q1, 1), 
                                  round_pad(Q3, 1)))
      )}
    
    # Creating the final table-----------------------------------------------------
    Table1<-table1(~ vs + mpg + hp  | am, 
               data=my_data,
               overall = FALSE, 
               render.continuous = my.render.cont, 
               extra.col=list(`SMD`=SMD_value)) #SMD Column
    
    Table1 #displays final table