Search code examples
rmodelingextractsurvival-analysis

how to extract formula from coxph model summary in R?


I am able to replicate the coxph model and the extraction of components as outlined in the tutorial here:http://www.sthda.com/english/wiki/cox-proportional-hazards-model. However, I am struggling to extract the formula object from the model.

library("survival")
library("survminer")
data("lung")
head(lung)

covariates <- c("age", "sex",  "ph.karno", "ph.ecog", "wt.loss")
univ_formulas <- sapply(covariates,
                        function(x) as.formula(paste('Surv(time, status)~', x)))

univ_models <- lapply( univ_formulas, function(x){coxph(x, data = lung)})
# Extract data 
univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (", 
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         res<-c(beta, HR, wald.test, p.value)
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
                                       "p.value")
                         return(res)
                         #return(exp(cbind(coef(x),confint(x))))
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
as.data.frame(res)

I have tried adding the following statement to the function(x) without success:

formula_extract <- paste( c(x$formula[[2]],
                                                x$formula[[1]],
                                                x$formula[[3]]), collapse='')

adding the statement looks like this and the resulting data frame "res" contains the column but it is all empty (not even NA).

# Extract data 
univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (", 
                                      HR.confint.lower, "-", HR.confint.upper, ")")
formula_extract <- paste( c(x$formula[[2]],
                                                    x$formula[[1]],
                                                    x$formula[[3]]), collapse='')

                         res<-c(beta, HR, wald.test, p.value, formula_extract)
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
                                       "p.value", "formula_extract")
                         return(res)
                         #return(exp(cbind(coef(x),confint(x))))
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
as.data.frame(res)

However, by itself this works:

paste( c(univ_models$age$formula[[2]],
                 univ_models$age$formula[[1]],
                 univ_models$age$formula[[3]]), collapse='')

Any help would be appreciated!


Solution

  • all you need to do is to use a different variable: ie

    univ_results <- lapply(univ_models,
                           function(y){ 
                             x <- summary(y)
                                 :
                                 :
                            formula_extract <- deparse(y$formula)
                                 :
                            }
    
    
    as.data.frame(res)
               beta HR (95% CI for HR) wald.test p.value               formula_extract
    age       0.019            1 (1-1)       4.1   0.042      Surv(time, status) ~ age
    sex       -0.53   0.59 (0.42-0.82)        10  0.0015      Surv(time, status) ~ sex
    ph.karno -0.016      0.98 (0.97-1)       7.9   0.005 Surv(time, status) ~ ph.karno
    ph.ecog    0.48        1.6 (1.3-2)        18 2.7e-05  Surv(time, status) ~ ph.ecog
    wt.loss  0.0013         1 (0.99-1)      0.05    0.83  Surv(time, status) ~ wt.loss