Search code examples
rggplot2survival

R Extract Data From SurvFit


library(survival)
etime <- with(mgus2, ifelse(pstat==0, futime, ptime))
event <- with(mgus2, ifelse(pstat==0, 2*death, 1))
event <- factor(event, 0:2, labels=c("censor", "pcm", "death"))

mfit2 <- survfit(Surv(etime, event) ~ sex, data=mgus2)

plot(mfit2, col=c(1,2,1,2), lty=c(2,2,1,1),
     mark.time=FALSE, lwd=2, xscale=12,
     xlab="Years post diagnosis", ylab="Probability in State")
legend(240, .6, c("death:female", "death:male", "pcm:female", "pcm:male"),
         col=c(1,2,1,2), lty=c(1,1,2,2), lwd=2, bty='n')

This is a reproducible example here. I wonder, how can it be possible to take out these data from 'mfit2' so it can be plotted in ggplot2?


Solution

  • You can extract the data from the summary of the fitted object using lapply

    sfit <- summary(mfit2)
    str(sfit)
    
    List of 24
     $ n            : int [1:2] 631 753
     $ time         : num [1:359] 1 2 3 4 5 6 7 8 9 10 ...
     $ n.risk       : int [1:359, 1:3] 631 610 599 595 588 587 581 580 573 569 ...
     $ n.event      : int [1:359, 1:3] 0 0 0 0 0 0 0 0 0 0 ...
     $ n.censor     : num [1:359] 1 0 0 0 0 0 0 0 0 1 ...
     $ pstate       : num [1:359, 1:3] 0.968 0.951 0.944 0.933 0.932 ...
     $ p0           : num [1:2, 1:3] 1 1 0 0 0 0
      ..- attr(*, "dimnames")=List of 2
      .. ..$ : chr [1:2] "sex=F" "sex=M"
      .. ..$ : chr [1:3] "(s0)" "pcm" "death"
     $ strata       : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
    ...
    

    I think the columns you need are the time, pstate and `strata. But some others, such as the numbers at risk may be useful.

    cols <- lapply(c(2:6, 8, 16, 17), function(x) sfit[x])
    

    Then combine these columns into a data frame with do.call

    data <- do.call(data.frame, cols)
    str(data)
    'data.frame':   359 obs. of  21 variables:
     $ time     : num  1 2 3 4 5 6 7 8 9 10 ...
     $ n.risk.1 : int  631 610 599 595 588 587 581 580 573 569 ...
     $ n.risk.2 : int  0 0 0 0 0 0 0 0 0 0 ...
     $ n.risk.3 : int  0 0 0 0 0 0 0 0 0 0 ...
     $ n.event.1: int  0 0 0 0 0 0 0 0 0 0 ...
     $ n.event.2: int  0 2 0 1 0 1 0 0 2 1 ...
     $ n.event.3: int  20 9 4 6 1 5 1 7 2 2 ...
     $ n.censor : num  1 0 0 0 0 0 0 0 0 1 ...
     $ pstate.1 : num  0.968 0.951 0.944 0.933 0.932 ...
     $ pstate.2 : num  0 0.00317 0.00317 0.00476 0.00476 ...
     $ pstate.3 : num  0.0317 0.046 0.0523 0.0619 0.0634 ...
     $ strata   : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
     $ lower.1  : num  0.955 0.934 0.927 0.914 0.912 ...
     $ lower.2  : num  NA 0.000796 0.000796 0.00154 0.00154 ...
     $ lower.3  : num  0.0206 0.0322 0.0375 0.0456 0.047 ...
     $ upper.1  : num  0.982 0.968 0.963 0.953 0.952 ...
     $ upper.2  : num  NA 0.0127 0.0127 0.0147 0.0147 ...
     $ upper.3  : num  0.0488 0.0656 0.0729 0.0838 0.0856 ...
    

    This data is in wide format, best to reshape to long for the graph.

    mgus3 <- data %>%
      pivot_longer(cols=-c(time, strata, n.censor),
                   names_to=c(".value","state"),
                   names_pattern="(.+).(.+)") %>%
      filter(state!=1) %>%  # Exclude the censored state
      mutate(state=factor(state, labels=c("pcm","death")), 
             group=interaction(strata, state)) 
    

    Then plot it.

    library(ggplot)
    
    mgus3 %>%
      ggplot(aes(x=time, y=pstate, col=group)) +
      geom_line(aes(linetype=group)) +
      ylab("Probability in State") +
      theme_bw()
    

    enter image description here

    You should be able to add confidence bands and make it more pretty.