I am working with two different binary classification models, each having a different number of test samples (there are some mutual samples). The prediction scores from these models are used to split the samples into two groups - group 0 and group 1 - using the median prediction score as a threshold. So model 1 has now the samples grouped into two groups, and so is model 2.
Now, my objective is to take group 0
from both models and plot their survival curves on the same graph. When dealing with groups originating from the same model, plotting survival curves was straightforward. However, I am facing complications when attempting to plot survival curves for groups that come from different models.
This is my code with explenation:
# `survivalCurve` is a function that I made, it plots a survival curve for group 0 and group 1, no problem here.
# tme and nontme are both data frames that the function returns. So the function plots the survival curve, AND it also returns the data frame it used.
tme = survivalCurve(mymodel = model1, model_num = 8, use.TME = TRUE)
nontme = survivalCurve(mymodel = model2, model_num = 29, use.TME = FALSE)
# Now I take group 0 from both of those models
tme_0 = tme %>% dplyr::filter(group == 0)
nontme_0 = nontme %>% dplyr::filter(group == 0)
# I want to plot the two groups of 0 in the same plot. Using this code I was able to plot each one separatly but not together!
tme_0$rowname = rownames(tme_0)
nontme_0$rowname = rownames(nontme_0)
zeros = merge(tme_0, nontme_0, by = "rowname", all = TRUE)
zeros_x = zeros[!is.na(zeros$survival_time.x) & !is.na(zeros$survival_status.x), ]
zeros_y = zeros[!is.na(zeros$survival_time.y) & !is.na(zeros$survival_status.y), ]
# Create survival objects for each group (group 0 from model 1, and group 0 from model 2)
survival_object_x = Surv(time = zeros_x$survival_time.x, event = zeros_x$survival_status.x)
survival_object_y = Surv(time = zeros_y$survival_time.y, event = zeros_y$survival_status.y)
# Fit survival curves
fit_x = survfit(survival_object_x ~ 1)
fit_y = survfit(survival_object_y ~ 1)
ggsurv_x = ggsurvplot(fit_x, data = zeros_x, risk.table = TRUE, pval = TRUE, conf.int = TRUE, legend.title = "Group X", legend.labs = c("Group X"), palette = c("blue"))
ggsurv_y = ggsurvplot(fit_y, data = zeros_y, risk.table = TRUE, pval = TRUE, conf.int = TRUE, legend.title = "Group Y", legend.labs = c("Group Y"), palette = c("red"))
So, ggsurv_x
and ggsurv_y
both give me a survival curve, but each one is alone of course. How can I put them in the same graph ? Here is some of tme
and nontme
which contain the survival time, survival status, prediction score and the group the samples belong to (according to the median as I explained earlier):
> dput(tme[1:20,])
structure(list(survival_time = c(9, 7, 8, 32, 27, 21, 5, 7, 25,
15, 14, 30, 28, 6, 25, 27, 37, 63, 63, 22), survival_status = c(1,
1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1), prediction_scores = c(0.452489739997856,
0.440583099511596, 0.509308651793677, 0.457296723067907, 0.455967522911745,
0.456091537587146, 0.52058697349597, 0.508446710238974, 0.468852537302421,
0.401894923842953, 0.459980058832451, 0.488860355868262, 0.457574356551677,
0.49155383636979, 0.470455235473811, 0.43061685052414, 0.428729855269241,
0.442110813635852, 0.413582392521575, 0.407929513543207), group = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L), levels = c("0", "1"), class = "factor")), row.names = c("Nivolumab_2017_p002_ar_8815",
"Nivolumab_2017_p005_ar_8883", "Nivolumab_2017_p010_ar_8887",
"Nivolumab_2017_p031_ar_8913", "Nivolumab_2017_p034_ar_8929",
"Nivolumab_2017_p037_ar_8900", "Nivolumab_2017_p039_ar_8819",
"Nivolumab_2017_p046_ar_8904", "Nivolumab_2017_p072_ar_8861",
"Nivolumab_2017_p077_ar_8846", "Nivolumab_2017_p082_ar_8822",
"Nivolumab_2017_p085_ar_8829", "Nivolumab_2017_p089_ar_8831",
"Nivolumab_2017_p090_ar_8866", "Nivolumab_2017_p098_ar_8853",
"Nivolumab_2017_p101_ar_8834", "EA595720", "EA632149", "EA632174",
"EA639099"), class = "data.frame")
> dput(nontme[1:20,])
structure(list(survival_time = c(5, 31, 24, 36, 16, 15, 3, 5,
30, 11, 27, 63, 17, 59, 42, 55, 14, 53, 16, 2), survival_status = c(0,
0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1), prediction_scores = c(0.399196720335709,
0.399196712585732, 0.399196718778835, 0.399196729515291, 0.399196719003673,
0.39919672164811, 0.399196722134395, 0.399196714987811, 0.399196722354385,
0.399196713792278, 0.39919672386451, 0.395944775789647, 0.395944769442692,
0.395944782920291, 0.395944768860647, 0.395944773326361, 0.395944782577356,
0.395944769654135, 0.39594477072273, 0.395944782418024), rank = c(144,
112, 139, 182, 141, 150, 151, 125, 154, 117, 159, 52, 35, 77,
32, 46, 75, 38, 40, 74), group = structure(c(1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("0", "1"), class = "factor")), row.names = c("Nivolumab_2017_p024_ar_8906",
"Nivolumab_2017_p026_ar_8920", "Nivolumab_2017_p028_ar_8922",
"Nivolumab_2017_p036_ar_8898", "Nivolumab_2017_p062_ar_8856",
"Nivolumab_2017_p077_ar_8846", "Nivolumab_2017_p078_ar_8864",
"Nivolumab_2017_p084_ar_8850", "Nivolumab_2017_p085_ar_8829",
"Nivolumab_2017_p092_ar_8867", "Nivolumab_2017_p101_ar_8834",
"EA632174", "EA632234", "EA632688", "EA632802", "EA639069", "EA639120",
"EA639131", "G109543_RCCBMS_00114_T_v1_RNA_OnPrem", "G109543_RCCBMS_00147_T_v1_RNA_OnPrem"
), class = "data.frame")
You could use ggsurvplot_combine
to combine multiple survfit objects on the same plot.
which requires to pass your survfit objects as well as the underlying data as a list.
library(survminer)
ggsurvplot_combine(
list(fit_x, fit_y),
list(zeros_x, zeros_y),
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
legend.labs = c("Group X", "Group Y"),
palette = c("blue", "red")
)