Search code examples
rpredictsurvival-analysis

Error using rms package for predict survival probability in r


I have a dataset of 5 variables: CANT_ANU, RETIRE, SEX, PROF and AGE_INI. Like this:

library(data.table)
library(rms)

dt <- structure(list(CANT_ANU = c(32.1671232876712, 12.1671232876712, 
                                    45, 28.2465753424658, 33, 3, 34.2493150684932, 31.3287671232877, 
                                    40.413698630137, 31, 19.0849315068493, 31.0846994535519, 20.586301369863, 
                                    34, 36.0849315068493, 32.9150684931507, 24.0849315068493, 3.24657534246575, 
                                    17.0792349726776, 15.0849315068493, 25.7486338797814, 19, 31.3287671232877, 
                                    17.1616438356164, 31.0849315068493), RETIRE = c(1, 1, 1, 1, 1, 
                                                                                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1), 
                 SEX = c(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 
                          1, 1, 1, 1, 1, 0, 1, 0, 1), PROF = c(0, 1, 0, 0, 0, 
                                                                      0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 
                                                                      0), AGE_INI = c(49.2383561643836, 64.0575342465753, 32.9945205479452, 
                                                                                       44.6767123287671, 40.4383561643836, 68.7890410958904, 38.2459016393443, 
                                                                                       38.7479452054795, 30.5561643835616, 41.0684931506849, 51.1260273972603, 
                                                                                       41.6219178082192, 54.9479452054794, 36.0846994535519, 32.0109589041096, 
                                                                                       36.1698630136986, 44.6794520547945, 66.4630136986301, 54.8493150684931, 
                                                                                       54.4246575342466, 50.8191780821918, 48.6657534246575, 37.0493150684932, 
                                                                                       53.2712328767123, 37.6684931506849)), row.names = c(NA, -25L
                                                                                       ), class = c("data.table", "data.frame"))

I want to get survival probabilities for each subject on the entire dataset from a estimated exponential model , so I tried this:

mod.par.exp <- psm(formula = Surv(CANT_ANU, RETIRE) ~ SEX + PROF + AGE_INI,data = dt,dist = "exponential")
probs.exp.rms <- survest(mod.par.exp,dt,times = seq(10,50,10), conf.int=.9)

But I get the following error:

Error in x %*% fit$var[-last, last, drop = FALSE] : 
  argumentos no compatibles

What I am doing wrong? I previously tried the examples indicated in survest.psm in the package manual and they worked.


Solution

  • the error is called because the x is a 4x4 matrix and fit$var[-last, last, drop = FALSE] is a 3x1 column vector.

    Here is the part of the code: survest.psm

    Attached a reprex that shows you why the error occurs.

    For conf.int = 0 or conf.int = FALSE it works.

    I hope the answer helps you to narrow down the problem. It might also be useful to contact the package author or post an issue on the github site.

    library(data.table)
    library(rms)
    #> Lade nötiges Paket: Hmisc
    #> Lade nötiges Paket: lattice
    #> Lade nötiges Paket: survival
    #> Lade nötiges Paket: Formula
    #> Lade nötiges Paket: ggplot2
    #> 
    #> Attache Paket: 'Hmisc'
    #> The following objects are masked from 'package:base':
    #> 
    #>     format.pval, units
    #> Lade nötiges Paket: SparseM
    #> 
    #> Attache Paket: 'SparseM'
    #> The following object is masked from 'package:base':
    #> 
    #>     backsolve
    
    dt <- structure(list(CANT_ANU = c(32.1671232876712, 12.1671232876712, 
                                      45, 28.2465753424658, 33, 3, 34.2493150684932, 31.3287671232877, 
                                      40.413698630137, 31, 19.0849315068493, 31.0846994535519, 20.586301369863, 
                                      34, 36.0849315068493, 32.9150684931507, 24.0849315068493, 3.24657534246575, 
                                      17.0792349726776, 15.0849315068493, 25.7486338797814, 19, 31.3287671232877, 
                                      17.1616438356164, 31.0849315068493), RETIRE = c(1, 1, 1, 1, 1, 
                                                                                      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1), 
                         SEX = c(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 
                                 1, 1, 1, 1, 1, 0, 1, 0, 1), PROF = c(0, 1, 0, 0, 0, 
                                                                      0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 
                                                                      0), AGE_INI = c(49.2383561643836, 64.0575342465753, 32.9945205479452, 
                                                                                      44.6767123287671, 40.4383561643836, 68.7890410958904, 38.2459016393443, 
                                                                                      38.7479452054795, 30.5561643835616, 41.0684931506849, 51.1260273972603, 
                                                                                      41.6219178082192, 54.9479452054794, 36.0846994535519, 32.0109589041096, 
                                                                                      36.1698630136986, 44.6794520547945, 66.4630136986301, 54.8493150684931, 
                                                                                      54.4246575342466, 50.8191780821918, 48.6657534246575, 37.0493150684932, 
                                                                                      53.2712328767123, 37.6684931506849)), row.names = c(NA, -25L
                                                                                      ), class = c("data.table", "data.frame"))
    
    mod.par.exp <- psm(formula = Surv(CANT_ANU, RETIRE) ~ SEX + PROF + AGE_INI,data = dt,dist = "exponential")
    
    # Throws the error: 
    probs.exp.rms <- survest(mod.par.exp,dt,times = seq(10,50,10), conf.int = .9)
    #> Error in x %*% fit$var[-last, last, drop = FALSE]: nicht passende Argumente
    
    # reason for the error: 
    ## setup of nvar Line 38 of linked github site: 
    ## nvar <- length(fit$coef) - num.intercepts(fit)
    
    ## in your case nvar is: 
    nvar_test <- length(mod.par.exp$coef) - num.intercepts(mod.par.exp); nvar_test
    #> [1] 3
    
    ## setup of x since linear.predictors are missing (first if)... and some more conditions: 
    ## x is defined in Line 52 of linked github site. 
    ##  if(missing(x)) x <- cbind(Intercept=1, predict(fit, newdata, type="x"))
    
    ## in your case x is: 
    x_test <- cbind(Intercept=1, predict(mod.par.exp, dt, type="x")); x_test
    #>    Intercept SEX PROF  AGE_INI
    #> 1          1   1    0 49.23836
    #> 2          1   1    1 64.05753
    #> 3          1   1    0 32.99452
    #> 4          1   1    0 44.67671
    #> 5          1   0    0 40.43836
    #> 6          1   0    0 68.78904
    #> 7          1   0    1 38.24590
    #> 8          1   0    1 38.74795
    #> 9          1   1    0 30.55616
    #> 10         1   1    0 41.06849
    #> 11         1   0    0 51.12603
    #> 12         1   1    0 41.62192
    #> 13         1   1    0 54.94795
    #> 14         1   0    0 36.08470
    #> 15         1   1    0 32.01096
    #> 16         1   1    0 36.16986
    #> 17         1   1    0 44.67945
    #> 18         1   1    0 66.46301
    #> 19         1   1    0 54.84932
    #> 20         1   1    0 54.42466
    #> 21         1   1    1 50.81918
    #> 22         1   0    0 48.66575
    #> 23         1   1    0 37.04932
    #> 24         1   0    1 53.27123
    #> 25         1   1    0 37.66849
    
    ## next condition from line 55 to 62: 
    ## g1 in line 56 can be calculated properly
    
    ## last is calculated from 57 - 60
    # last <- {
    #   nscale <- length(fit$icoef) - 1
    #   ncol(fit$var) - (1 : nscale) + 1
    # }
    
    ## in your case last is: 
    last_test <- {
      nscale <- length(mod.par.exp$icoef) - 1
      ncol(mod.par.exp$var) - (1 : nscale) + 1
    }
    last_test
    #> [1] 4
    
    ## computation of g2 in Line 62 throws the error: 
    
    ## g2 <- drop(x %*% fit$var[-last, last, drop=FALSE])
    
    column_vec <- mod.par.exp$var[-last_test, last_test, drop = FALSE]; column_vec
    #>               [,1]
    #> [1,] -0.0222297822
    #> [2,] -0.0003043264
    #> [3,] -0.0022725899
    
    ## error in matrix multiplication: 4x4 %*% 3x1 does not work!! 
    x_test %*% column_vec
    #> Error in x_test %*% column_vec: nicht passende Argumente
    
    # setting conf.int to FALSE or 0 works. 
    probs.exp.rms_noconf <- survest(mod.par.exp, dt, times = seq(10,50,10), conf.int = FALSE)
    probs.exp.rms_noconf
    #>           10         20          30           40           50
    #> 1  0.6109711 0.37328575 0.228066821 0.1393422481 8.513409e-02
    #> 2  0.5169610 0.26724870 0.138157161 0.0714218675 3.692232e-02
    #> 3  0.8116670 0.65880336 0.534728967 0.4340218715 3.522812e-01
    #> 4  0.6790361 0.46109007 0.313096820 0.2126040542 1.443658e-01
    #> 5  0.6610245 0.43695340 0.288836903 0.1909282713 1.262083e-01
    #> 6  0.1565442 0.02450608 0.003836283 0.0006005478 9.401225e-05
    #> 7  0.7981757 0.63708443 0.508505305 0.4058765720 3.239608e-01
    #> 8  0.7933484 0.62940168 0.499334815 0.3961464753 3.142822e-01
    #> 9  0.8324214 0.69292545 0.576806000 0.4801456797 3.996836e-01
    #> 10 0.7262735 0.52747323 0.383089845 0.2782280123 2.020696e-01
    #> 11 0.4825998 0.23290259 0.112398750 0.0542436172 2.617796e-02
    #> 12 0.7194063 0.51754539 0.372325405 0.2678532341 1.926953e-01
    #> 13 0.5135481 0.26373161 0.135438854 0.0695543604 3.571951e-02
    #> 14 0.7197743 0.51807509 0.372897156 0.2684018024 1.931887e-01
    #> 15 0.8202983 0.67288936 0.551970025 0.4527800935 3.714148e-01
    #> 16 0.7812767 0.61039332 0.476886090 0.3725800018 2.910881e-01
    #> 17 0.6789980 0.46103834 0.313044133 0.2125563541 1.443253e-01
    #> 18 0.2936587 0.08623542 0.025323782 0.0074365485 2.183807e-03
    #> 19 0.5153319 0.26556694 0.136855109 0.0705257999 3.634419e-02
    #> 20 0.5229761 0.27350397 0.143036034 0.0748044237 3.912092e-02
    #> 21 0.7206720 0.51936820 0.374294139 0.2697433227 1.943965e-01
    #> 22 0.5274674 0.27822187 0.146752970 0.0774074094 4.082989e-02
    #> 23 0.7721483 0.59621304 0.460364901 0.3554699878 2.744756e-01
    #> 24 0.6071013 0.36857201 0.223760552 0.1358453261 8.247188e-02
    #> 25 0.7655297 0.58603572 0.448627746 0.3434378625 2.629119e-01
    

    Created on 2020-07-06 by the reprex package (v0.3.0)