I have created a figure with two Kaplan-Meier survival curves to display the effect of two medications on the survival of patients. The dataset includes 41 patients, 26 (A1-A26) have received an oral medication and 15 (B1-B15) a vaccine. The x-axis shows the days and the y-axis the percentage of the overall patient pool. I am only interested to plot from 0-400 days of the study, which means that two datapoints each for 'oral' (A25,A26) and 'vaccine' (B14,B15) would not be shown. Moreover, I would like to plot Kaplan-Meier curves that drop by 1.45 units upon the death of a patient (as indicated in the data column 'survival'). Based on this, the curve for 'oral' would stop at 62.32% and that of 'vaccine' at 81.16% (that exludes the two datapoints each that are > 400 days), so that the y-axis would start at 60% (rather than 0%). However, at the moment the curve for 'oral' drops by 26/100 units and that of 'vaccine' by 15/100 units based on the assumption that all patients will have deceased by the end of the trial. I would therefore be interested to know:
Below are a reproducible example dataset and the code that I currently use.
Required packages: library(survival), library(ggplot2)
Load reproducible data
structure(list(patient = structure(c(1L, 12L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 27L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 28L, 29L, 30L, 31L, 32L, 33L), .Label = c("A1",
"A10", "A11", "A12", "A13", "A14", "A15", "A16", "A17", "A18",
"A19", "A2", "A20", "A21", "A22", "A23", "A24", "A25", "A26",
"A3", "A4", "A5", "A6", "A7", "A8", "A9", "B1", "B10", "B11",
"B12", "B13", "B14", "B15", "B2", "B3", "B4", "B5", "B6", "B7",
"B8", "B9"), class = "factor"), survival = c(98.55, 97.1, 95.65,
94.2, 92.75, 91.3, 89.85, 88.4, 86.95, 85.5, 84.05, 82.6, 81.15,
79.7, 78.25, 76.8, 75.35, 73.9, 72.45, 71, 69.55, 68.1, 66.65,
65.2, 49.9, 57.97, 98.55, 97.1, 95.65, 94.2, 92.75, 91.3, 89.85,
88.4, 86.95, 85.5, 84.05, 82.6, 81.15, 67.6, 72), days = c(103L,
105L, 110L, 121L, 124L, 126L, 140L, 144L, 152L, 173L, 176L, 181L,
185L, 200L, 206L, 211L, 223L, 247L, 253L, 261L, 276L, 281L, 309L,
334L, 402L, 489L, 148L, 216L, 255L, 257L, 280L, 290L, 306L, 325L,
305L, 307L, 334L, 329L, 343L, 560L, 610L), treatment = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("oral", "vaccine"
), class = "factor"), status = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), .Names = c("patient", "survival", "days", "treatment",
"status"), class = "data.frame", row.names = c(NA, -41L))
Create a Surv object and estimate a survivor function for your data
fit.test <- survfit(Surv(days, status == 1) ~ treatment, data=test, conf.int=FALSE)
Run function ggsurv
Plot
ggsurv(fit.test, lty.est = 1) +
geom_text(data = NULL, size=5.0, col = "red", x = 39.0, y = 0.23, label = "oral") +
geom_text(data = NULL, size=5.0, col = "blue", x = 30.5, y = 0.12, label = "vaccine") +
scale_x_continuous(expand=c(0.01,0.01),
limits=c(0,400),
breaks=c(0,50,100,150,200,250,300,350,400),
labels=c("0","50","100","150","200","250","300","350","400")) +
scale_y_continuous(expand=c(0.005,0.01),
limits=c(0,1.0),
breaks=c(0,0.2,0.4,0.6,0.8,1),
labels=c("0","0.2","0.4","0.6","0.8","1.0")) +
xlab("Time") +
ylab("Survival") +
theme_bw() +
theme(legend.position="none") +
theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
axis.text.x = element_text(vjust=4, size=14))+
theme(axis.title.y = element_text(angle=90, vjust=0.70, face="bold", size=18),
axis.text.y = element_text(size=14)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
theme(panel.border = element_rect(size=2, colour = "black", fill=NA, linetype=1)) +
theme(plot.margin = unit(c(-0.9,0.4,0.28,0.0),"lines"))
The problem arises by creating the survival
object. The way you present your data it looks like all the patients in the set have experienced the event after 489 days for oral and after 610 days for vaccine. However you know that this is only a part of the data, since you have the percentage remaining patients available. You can add rows for the patients that haven't experienced the event at the final day for the group and give them status 0. Alternatively you just use geom_step
to create the plot without using the ggsurv
function.
round(100/1.45)
test <- test[ ,c(1,3:5)]
extra_patients <-
data.frame(patient = c(paste('A', 27:69, sep = ''),
paste('B', 16:69, sep = '')),
days = rep(c(489, 610), c(43, 54)),
treatment = rep(c('oral', 'vaccine'), c(43, 54)),
status = 0)
full_test <- rbind(test, extra_patients)
library(survival)
fit.test <- survfit(Surv(days, status == 1) ~ treatment, data=full_test, conf.int=FALSE)
library(GGally)
ggsurv(fit.test) + coord_cartesian(xlim = c(0,400))