Is there a way to see the formula that R uses for the survfit confidence intervals?

I want to be able to see how the summary of survfit calculates its confidence intervals. Is there a way that I can ask R to show me how it calculated these to show me the formula?

Thanks :)

Solution

You can find the source code like this. First look at survfit:

> getAnywhere("survfit")
A single object matching ‘survfit’ was found
It was found in the following places
  package:survival
  namespace:survival
with value
function (formula, ...) 
{
    UseMethod("survfit", formula)
}
<bytecode: 0x000000000edccc88>
<environment: namespace:survival>
>

This tells us we have to look at survfit.formula, which is what that UseMethod call is saying. So we do that and we get a lot of code:

> getAnywhere("survfit.formula")
A single object matching ‘survfit.formula’ was found
It was found in the following places
  package:survival
  registered S3 method for survfit from namespace survival
  namespace:survival
with value

function (formula, data, weights, subset, na.action, etype, id, 
    istate, ...) 
{
    Call <- match.call()
    Call[[1]] <- as.name("survfit")
    mfnames <- c("formula", "data", "weights", "subset", "na.action", 
        "istate", "id", "etype")
    temp <- Call[c(1, match(mfnames, names(Call), nomatch = 0))]
    temp[[1]] <- as.name("model.frame")
    if (is.R()) 
        m <- eval.parent(temp)
    else m <- eval(temp, sys.parent())
    Terms <- terms(formula, c("strata", "cluster"))
    ord <- attr(Terms, "order")
    if (length(ord) & any(ord != 1)) 
        stop("Interaction terms are not valid for this function")
    n <- nrow(m)
    Y <- model.extract(m, "response")
    if (!is.Surv(Y)) 
        stop("Response must be a survival object")
    casewt <- model.extract(m, "weights")
    if (is.null(casewt)) 
        casewt <- rep(1, n)
    if (!is.null(attr(Terms, "offset"))) 
        warning("Offset term ignored")
    id <- model.extract(m, "id")
    istate <- model.extract(m, "istate")
    temp <- untangle.specials(Terms, "cluster")
    if (length(temp$vars) > 0) {
        if (length(temp$vars) > 1) 
            stop("can not have two cluster terms")
        if (!is.null(id)) 
            stop("can not have both a cluster term and an id variable")
        id <- m[[temp$vars]]
        Terms <- Terms[-temp$terms]
    }
    ll <- attr(Terms, "term.labels")
    if (length(ll) == 0) 
        X <- factor(rep(1, n))
    else X <- strata(m[ll])
    if (!is.Surv(Y)) 
        stop("y must be a Surv object")
    etype <- model.extract(m, "etype")
    if (!is.null(etype)) {
        if (attr(Y, "type") == "mcounting" || attr(Y, "type") == 
            "mright") 
            stop("cannot use both the etype argument and mstate survival type")
        if (length(istate)) 
            stop("cannot use both the etype and istate arguments")
        status <- Y[, ncol(Y)]
        etype <- as.factor(etype)
        temp <- table(etype, status == 0)
        if (all(rowSums(temp == 0) == 1)) {
            newlev <- levels(etype)[order(-temp[, 2])]
        }
        else newlev <- c(" ", levels(etype)[temp[, 1] > 0])
        status <- factor(ifelse(status == 0, 0, as.numeric(etype)), 
            labels = newlev)
        if (attr(Y, "type") == "right") 
            Y <- Surv(Y[, 1], status, type = "mstate")
        else if (attr(Y, "type") == "counting") 
            Y <- Surv(Y[, 1], Y[, 2], status, type = "mstate")
        else stop("etype argument incompatable with survival type")
    }
    if (attr(Y, "type") == "left" || attr(Y, "type") == "interval") 
        temp <- survfitTurnbull(X, Y, casewt, ...)
    else if (attr(Y, "type") == "right" || attr(Y, "type") == 
        "counting") 
        temp <- survfitKM(X, Y, casewt, ...)
    else if (attr(Y, "type") == "mright" || attr(Y, "type") == 
        "mcounting") 
        temp <- survfitCI(X, Y, weights = casewt, id = id, istate = istate, 
            ...)
    else {
        stop("unrecognized survival type")
    }
    if (is.null(temp$states)) 
        class(temp) <- "survfit"
    else class(temp) <- c("survfitms", "survfit")
    if (!is.null(attr(m, "na.action"))) 
        temp$na.action <- attr(m, "na.action")
    temp$call <- Call
    temp
}
<bytecode: 0x000000003f6a8c28>
<environment: namespace:survival>

We scan this and eventually notice a call to survfitCI close to the end. Sounds like what we are looking for. So once again into the breech:

> getAnywhere("survfitCI")
A single object matching ‘survfitCI’ was found
It was found in the following places
  package:survival
  namespace:survival
with value

function (X, Y, weights, id, istate, type = c("kaplan-meier", 
    "fleming-harrington", "fh2"), se.fit = TRUE, conf.int = 0.95, 
    conf.type = c("log", "log-log", "plain", "none"), conf.lower = c("usual", 
        "peto", "modified")) 
{
    method <- match.arg(type)
    conf.type <- match.arg(conf.type)
    conf.lower <- match.arg(conf.lower)
    if (is.logical(conf.int)) {
        if (!conf.int) 
            conf.type <- "none"
        conf.int <- 0.95
    }
    type <- attr(Y, "type")
    if (type != "mright" && type != "mcounting" && type != "right" && 
        type != "counting") 
        stop(paste("Cumulative incidence computation doesn't support \"", 
            type, "\" survival data", sep = ""))
    n <- nrow(Y)
    status <- Y[, ncol(Y)]
    ncurve <- length(levels(X))
    state.names <- attr(Y, "states")
    if (missing(istate) || is.null(istate)) 
        istate <- rep(0L, n)
    else if (is.factor(istate) || is.character(istate)) {
        temp <- as.factor(istate)
        appear <- (levels(istate))[unique(as.numeric(istate))]
        state.names <- unique(c(attr(Y, "states"), appear))
        istate <- as.numeric(factor(as.character(istate), levels = state.names))
    }
    else if (!is.numeric(istate) || any(istate != floor(istate))) 
        stop("istate should be a vector of integers or a factor")
    if (length(id) == 0) 
        id <- 1:n
    if (length(istate) == 1) 
        istate <- rep(istate, n)
    if (length(istate) != n) 
        stop("wrong length for istate")
    states <- sort(unique(c(istate, 1:length(attr(Y, "states")))))
    docurve2 <- function(entry, etime, status, istate, wt, states, 
        id, se.fit) {
        ftime <- factor(c(entry, etime))
        ltime <- levels(ftime)
        ftime <- matrix(as.integer(ftime), ncol = 2)
        timeset <- as.numeric(ltime[sort(unique(ftime[, 2]))])
        nstate <- length(states)
        uid <- sort(unique(id))
        P <- as.vector(tapply(wt, factor(istate, levels = states), 
            sum)/sum(wt))
        P <- ifelse(is.na(P), 0, P)
        cstate <- istate[match(uid, id)]
        storage.mode(wt) <- "double"
        storage.mode(cstate) <- "integer"
        storage.mode(status) <- "integer"
        fit <- .Call(Csurvfitci, ftime, order(ftime[, 1]) - 1L, 
            order(ftime[, 2]) - 1L, length(timeset), status, 
            cstate - 1L, wt, match(id, uid) - 1L, P, as.integer(se.fit))
        prev0 <- table(factor(cstate, levels = states), exclude = NA)/length(cstate)
        if (se.fit) 
            list(time = timeset, pmat = t(fit$p), std = sqrt(t(fit$var)), 
                n.risk = colSums(fit$nrisk), n.event = fit$nevent, 
                n.censor = fit$ncensor, prev0 = prev0, cumhaz = array(fit$cumhaz, 
                  dim = c(nstate, nstate, length(timeset))))
        else list(time = timeset, pmat = t(fit$p), n.risk = colSums(fit$nrisk), 
            n.event = fit$nevent, n.censor = fit$ncensor, prev0 = prev0, 
            cumhaz = array(fit$cumhaz, dim = c(nstate, nstate, 
                length(timeset))))
    }
    if (any(states == 0)) {
        state0 <- TRUE
        states <- states + 1
        istate <- istate + 1
        status <- ifelse(status == 0, 0, status + 1)
    }
    else state0 <- FALSE
    curves <- vector("list", ncurve)
    names(curves) <- levels(X)
    if (ncol(Y) == 2) {
        indx <- which(status == istate & status != 0)
        if (length(indx)) {
            warning("an observation transitions to it's starting state, transition ignored")
            status[indx] <- 0
        }
        if (length(id) && any(duplicated(id))) 
            stop("Cannot have duplicate id values with (time, status) data")
        entry <- rep(min(-1, 2 * min(Y[, 1]) - 1), n)
        for (i in levels(X)) {
            indx <- which(X == i)
            curves[[i]] <- docurve2(entry[indx], Y[indx, 1], 
                status[indx], istate[indx], weights[indx], states, 
                id[indx], se.fit)
        }
    }
    else {
        if (missing(id) || is.null(id)) 
            stop("the id argument is required for start:stop data")
        indx <- order(id, Y[, 2])
        indx1 <- c(NA, indx)
        indx2 <- c(indx, NA)
        same <- (id[indx1] == id[indx2] & !is.na(indx1) & !is.na(indx2))
        if (any(same & X[indx1] != X[indx2])) {
            who <- 1 + min(which(same & X[indx1] != X[indx2]))
            stop("subject is in two different groups, id ", (id[indx1])[who])
        }
        if (any(same & Y[indx1, 2] != Y[indx2, 1])) {
            who <- 1 + min(which(same & Y[indx1, 2] != Y[indx2, 
                1]))
            stop("gap in follow-up, id ", (id[indx1])[who])
        }
        if (any(Y[, 1] == Y[, 2])) 
            stop("cannot have start time == stop time")
        if (any(same & Y[indx1, 3] == Y[indx2, 3] & Y[indx1, 
            3] != 0)) {
            who <- 1 + min(which(same & Y[indx1, 1] != Y[indx2, 
                2]))
            warning("subject changes to the same state, id ", 
                (id[indx1])[who])
        }
        if (any(same & weights[indx1] != weights[indx2])) {
            who <- 1 + min(which(same & weights[indx1] != weights[indx2]))
            stop("subject changes case weights, id ", (id[indx1])[who])
        }
        indx <- order(Y[, 2])
        uid <- unique(id)
        temp <- (istate[indx])[match(uid, id[indx])]
        istate <- temp[match(id, uid)]
        for (i in levels(X)) {
            indx <- which(X == i)
            curves[[i]] <- docurve2(Y[indx, 1], Y[indx, 2], status[indx], 
                istate[indx], weights[indx], states, id[indx], 
                se.fit)
        }
    }
    grabit <- function(clist, element) {
        temp <- (clist[[1]][[element]])
        if (is.matrix(temp)) {
            nc <- ncol(temp)
            matrix(unlist(lapply(clist, function(x) t(x[[element]]))), 
                byrow = T, ncol = nc)
        }
        else {
            xx <- as.vector(unlist(lapply(clist, function(x) x[element])))
            if (class(temp) == "table") 
                matrix(xx, byrow = T, ncol = length(temp))
            else xx
        }
    }
    kfit <- list(n = as.vector(table(X)), time = grabit(curves, 
        "time"), n.risk = grabit(curves, "n.risk"), n.event = grabit(curves, 
        "n.event"), n.censor = grabit(curves, "n.censor"), prev = grabit(curves, 
        "pmat"), prev0 = grabit(curves, "prev0"))
    nstate <- length(states)
    kfit$cumhaz <- array(unlist(lapply(curves, function(x) x$cumhaz)), 
        dim = c(nstate, nstate, length(kfit$time)))
    if (length(curves) > 1) 
        kfit$strata <- unlist(lapply(curves, function(x) length(x$time)))
    if (se.fit) 
        kfit$std.err <- grabit(curves, "std")
    if (state0) {
        kfit$prev <- kfit$prev[, -1]
        if (se.fit) 
            kfit$std.err <- kfit$std.err[, -1]
        kfit$prev0 <- kfit$prev0[, -1]
    }
    if (se.fit) {
        std.err <- kfit$std.err
        zval <- qnorm(1 - (1 - conf.int)/2, 0, 1)
        surv <- 1 - kfit$prev
        if (conf.type == "plain") {
            temp <- zval * std.err
            kfit <- c(kfit, list(lower = pmax(kfit$prev - temp, 
                0), upper = pmin(kfit$prev + temp, 1), conf.type = "plain", 
                conf.int = conf.int))
        }
        if (conf.type == "log") {
            xx <- ifelse(kfit$prev == 1, 1, 1 - kfit$prev)
            temp1 <- ifelse(surv == 0, NA, exp(log(xx) + zval * 
                std.err/xx))
            temp2 <- ifelse(surv == 0, NA, exp(log(xx) - zval * 
                std.err/xx))
            kfit <- c(kfit, list(lower = pmax(1 - temp1, 0), 
                upper = 1 - temp2, conf.type = "log", conf.int = conf.int))
        }
        if (conf.type == "log-log") {
            who <- (surv == 0 | surv == 1)
            temp3 <- ifelse(surv == 0, NA, 1)
            xx <- ifelse(who, 0.1, kfit$surv)
            temp1 <- exp(-exp(log(-log(xx)) + zval * std.err/(xx * 
                log(xx))))
            temp1 <- ifelse(who, temp3, temp1)
            temp2 <- exp(-exp(log(-log(xx)) - zval * std.err/(xx * 
                log(xx))))
            temp2 <- ifelse(who, temp3, temp2)
            kfit <- c(kfit, list(lower = 1 - temp1, upper = 1 - 
                temp2, conf.type = "log-log", conf.int = conf.int))
        }
    }
    kfit$states <- state.names
    kfit$type <- attr(Y, "type")
    kfit
}
<bytecode: 0x000000002ce81838>
<environment: namespace:survival>

Somewhere in there is your answer.