I have simulated data and I want to get Kaplan Meier estimates.
I have 500 observations, Y = Observed time, Delta= status x_bin = Binary treatment variable and x_cont= 2 continuous variables
Y = c(23.076677,33.856999,0.587694,44.213549,9.027398,6.466811,
13.053572, 2.846332,30.895564 ,4.062382,16.524918 , 6.220079, 17.547434, 4.529800,
30.683129 ,25.443511 ,16.130519, 21.840292, 19.827314, 23.840220,22.518815, 9.873650,
6.585225 ,24.485416 ,5.811711 ,19.230248 , 6.344504 ,27.159498, 8.615084 , 7.899020,
9.224606, 43.429181 ,19.130139, 43.180901, 13.239691, 21.946553,29.469361,
13.792664,1.706786, 11.230684, 12.433856, 9.284416 ,36.884566 ,11.456953, 16.747181 ,21.003923 ,
41.090373, 18.944196, 31.675754 ,34.103413 ,19.433604 ,40.876068, 17.530126, 25.250155,
6.896457, 29.314967 ,6.465073,46.352824,15.591029,19.635961,24.107908,9.227189,14.164096,0.059026,
37.723229, 50.015481, 28.065238,30.262120 ,61.420504, 14.084382, 25.982968,15.213584,15.505326,
0.653056, 13.018299,1.575673, 18.589050, 0.000299,16.982758,10.798754,8.100633 ,5.362828 ,
0.453016 , 3.755654, 21.089715, 57.229954,0.141664, 25.948761, 87.196375 , 6.240832, 39.569735,
79.732973 ,17.317158 ,15.658974, 20.406179,18.944196, 31.675754 ,34.103413 ,19.43360,20.58367)
delta = rbinom(100,1,0.3)
x_cont= matrix(rnorm(200,0.2,0.25),100,2)
x_cont= as.matrix(data.frame(x_cont))
treated= factor(rbinom(100,1,0.5))
km_model=survfit(Surv(Y,delta) ~ treated +x_cont)
Error message:
Error in strata(mf[ll]): all arguments must be the same length
If I take only one continuous variable then it works but if I take more than one it is giving me this error.
TIA
We may need the data
as data.frame
. According to ?survfit.formula
data - a data frame in which to interpret the variables named in the formula, subset and weights arguments.
out <- survfit(Surv(Y,delta) ~ treated +X1 + X2,
data = data.frame(x_cont, treated))
-checking the output
> str(out)
List of 17
$ n : int [1:100] 1 1 1 1 1 1 1 1 1 1 ...
$ time : num [1:100] 15.214 19.23 14.084 0.142 5.812 ...
$ n.risk : num [1:100] 1 1 1 1 1 1 1 1 1 1 ...
$ n.event : num [1:100] 1 1 0 1 0 0 0 0 0 1 ...
$ n.censor : num [1:100] 0 0 1 0 1 1 1 1 1 0 ...
$ surv : num [1:100] 0 0 1 0 1 1 1 1 1 0 ...
$ std.err : num [1:100] Inf Inf 0 Inf 0 ...
$ cumhaz : num [1:100] 1 1 0 1 0 0 0 0 0 1 ...
$ std.chaz : num [1:100] 1 1 0 1 0 0 0 0 0 1 ...
$ strata : Named int [1:100] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "names")= chr [1:100] "treated=0, X1=-0.297081842241384 , X2=0.105548785177934 " "treated=0, X1=-0.296945987489856 , X2=0.180710460866624 " "treated=0, X1=-0.252387685923698 , X2=-0.0657794268622904" "treated=0, X1=-0.242585454493932 , X2=0.69364514503322 " ...
$ type : chr "right"
$ logse : logi TRUE
$ conf.int : num 0.95
$ conf.type: chr "log"
$ lower : num [1:100] NA NA 1 NA 1 1 1 1 1 NA ...
$ upper : num [1:100] NA NA 1 NA 1 1 1 1 1 NA ...
$ call : language survfit(formula = Surv(Y, delta) ~ treated + X1 + X2, data = data.frame(x_cont, treated))
- attr(*, "class")= chr "survfit"
If we have more columns, consider creating the expression with paste
dat1 <- data.frame(x_cont, treated)
expr1 <- as.formula(paste("Surv(Y,delta) ~ ", paste(names(dat1), collapse="+")))
out1 <- survfit(expr1, data = dat1)
out1$call <- deparse(expr1)
-checking the structure
> str(out1)
List of 17
$ n : int [1:100] 1 1 1 1 1 1 1 1 1 1 ...
$ time : num [1:100] 15.214 19.23 21.09 14.084 0.142 ...
$ n.risk : num [1:100] 1 1 1 1 1 1 1 1 1 1 ...
$ n.event : num [1:100] 1 1 0 0 1 1 0 0 1 0 ...
$ n.censor : num [1:100] 0 0 1 1 0 0 1 1 0 1 ...
$ surv : num [1:100] 0 0 1 1 0 0 1 1 0 1 ...
$ std.err : num [1:100] Inf Inf 0 0 Inf ...
$ cumhaz : num [1:100] 1 1 0 0 1 1 0 0 1 0 ...
$ std.chaz : num [1:100] 1 1 0 0 1 1 0 0 1 0 ...
$ strata : Named int [1:100] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "names")= chr [1:100] "X1=-0.297081842241384, X2=0.105548785177934 , treated=0" "X1=-0.296945987489856, X2=0.180710460866624 , treated=0" "X1=-0.292560489804895, X2=0.374632415815526 , treated=1" "X1=-0.252387685923698, X2=-0.0657794268622904, treated=0" ...
$ type : chr "right"
$ logse : logi TRUE
$ conf.int : num 0.95
$ conf.type: chr "log"
$ lower : num [1:100] NA NA 1 1 NA NA 1 1 NA 1 ...
$ upper : num [1:100] NA NA 1 1 NA NA 1 1 NA 1 ...
$ call : chr "Surv(Y, delta) ~ X1 + X2 + treated"
- attr(*, "class")= chr "survfit"