Search code examples
rmachine-learningrandom-forestr-caretroc

All the ROC metric values are missing


I know there have been several topics about this question already, but non of the answers solved my problem. I'm trying to run a random forest model, using caret. I'm getting this error:

Something is wrong; all the ROC metric values are missing:
      ROC           Sens          Spec    
 Min.   : NA   Min.   : NA   Min.   : NA  
 1st Qu.: NA   1st Qu.: NA   1st Qu.: NA  
 Median : NA   Median : NA   Median : NA  
 Mean   :NaN   Mean   :NaN   Mean   :NaN  
 3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA  
 Max.   : NA   Max.   : NA   Max.   : NA  
 NA's   :10    NA's   :10    NA's   :10   
Error: Stopping
In addition: There were 50 or more warnings (use warnings() to see the first 50)

What is this error and how do I fix it? I should note that the data I provide here is abbreviated, my data is much bigger with more predictive features, but I got the same error there also. Therefore I tried to make the model based on simpler data.

I have no NA or Inf values in the data. I tried redownloading packages like caret and pROC, I tried to change the parameters, but nothing helped.

This is the code:

ctrlCV = trainControl(method = 'cv', number = 10 , classProbs = TRUE , savePredictions = TRUE, summaryFunction = twoClassSummary )
rfGRID <- expand.grid(.mtry = c(1 : 10))

rfFit <- train(response~., data = dimdum,
               method = "rf",
               metric="ROC",
               importance = TRUE,
               trControl = ctrlCV,
               tuneGrid = rfGRID,
               ntree = c(50,100,150,200,300,400,500),
               nodesize = c(1:10)
               
)

rfROC = roc(dimdum$response,predict(rfFit,dimdum, type='prob')[,1])
plot(rfROC)

Some of the data:

structure(list(response = c("NoResponse", "Response", "NoResponse", 
"NoResponse", "NoResponse", "Response", "NoResponse", "NoResponse", 
"NoResponse", "NoResponse", "NoResponse", "Response", "NoResponse", 
"Response", "NoResponse", "NoResponse", "NoResponse", "NoResponse", 
"Response", "Response", "NoResponse", "NoResponse", "NoResponse", 
"NoResponse", "NoResponse", "NoResponse", "NoResponse", "Response", 
"NoResponse", "NoResponse", "NoResponse", "NoResponse", "NoResponse", 
"NoResponse", "NoResponse", "Response", "NoResponse", "NoResponse", 
"NoResponse", "Response", "Response", "NoResponse", "Response", 
"Response", "NoResponse", "Response", "Response", "NoResponse", 
"Response", "Response"), CD4..Tem = c(0.206146305909711, 0.38344530718027, 
0.111171710498514, -0.024620418652091, -0.024620418652091, 0.241356282324198, 
0.190401963339481, 0.0381564797828905, 0.00208479038732372, -0.024620418652091, 
0.0345146685774692, 0.0047448955916752, 0.0954402244646442, 0.216170609750478, 
0.0679837609588422, 0.1087338604344, 0.0307783462567513, -0.024620418652091, 
0.00930882669937516, 0.228984175232275, 0.198029266287967, 0.00849741399216577, 
0.167122425878708, -0.0185389752646852, 0.0349285293854749, -0.015164453751509, 
0.0530002007752186, -0.00464766527016771, 0.229228539194469, 
-0.024620418652091, -0.024620418652091, 0.00909241866793368, 
0.00959645779130966, -0.0169517988930254, 0.130416251320013, 
-0.0226155780862924, -0.0226155780862924, -0.00708354014661853, 
-0.0226155780862924, 0.0236200668251617, -0.0226155780862924, 
-0.0226155780862924, -0.0226155780862924, -0.0226155780862924, 
-0.0226155780862924, -0.00226780446329141, -0.00703727203694584, 
-0.0226155780862924, 0.040447933249888, -0.0226155780862924), 
    Epithelial.cells = c(0.213818759771441, 0.224884228557244, 
    0.213818759771441, 0.564636116181376, 0.213818759771441, 
    0.213818759771441, 0.256571259511661, 0.213818759771441, 
    0.225551386999972, 0.213818759771441, 0.320498217450289, 
    0.213818759771441, 0.213818759771441, 0.213818759771441, 
    0.250752952186148, 0.54432086478806, 0.213818759771441, 0.213818759771441, 
    0.213818759771441, 0.224579338204213, 0.244604368723937, 
    0.239048638424405, 0.213818759771441, 0.213818759771441, 
    0.213818759771441, 0.213818759771441, 0.755246080444261, 
    0.213818759771441, 0.224789200187943, 0.213818759771441, 
    0.213818759771441, 0.213818759771441, 0.602551670320415, 
    0.221211285726714, 0.528154858032774, 0.191393513022707, 
    0.204473730554233, 0.199140565064947, 0.191393513022707, 
    0.191393513022707, 0.208364515830724, 0.199044034497245, 
    0.218231966624601, 0.191393513022707, 0.199037583564646, 
    0.233310726880044, 0.257245920265987, 0.2245306029313, 0.30356359401388, 
    0.202283902795669)), row.names = c("Pt1", "Pt101", "Pt106", 
"Pt11", "Pt17", "Pt18", "Pt24", "Pt26", "Pt27", "Pt28", "Pt29", 
"Pt3", "Pt31", "Pt34", "Pt36", "Pt37", "Pt38", "Pt39", "Pt44", 
"Pt49", "Pt5", "Pt52", "Pt59", "Pt62", "Pt65", "Pt66", "Pt67", 
"Pt72", "Pt77", "Pt78", "Pt84", "Pt85", "Pt89", "Pt9", "Pt90", 
"EA595454", "EA595500", "EA595522", "EA595529", "EA595597", "EA595624", 
"EA595635", "EA595647", "EA595654", "EA595719", "EA595720", "EA632133", 
"EA632171", "EA632174", "EA632234"), class = "data.frame")

Solution

  • If you test the ntree values one-at-a-time in a loop it works as expected:

    library(randomForest)
    #> randomForest 4.7-1.1
    #> Type rfNews() to see new features/changes/bug fixes.
    #library(mlbench)
    library(caret)
    #> Loading required package: ggplot2
    #> 
    #> Attaching package: 'ggplot2'
    #> The following object is masked from 'package:randomForest':
    #> 
    #>     margin
    #> Loading required package: lattice
    library(pROC)
    #> Type 'citation("pROC")' for a citation.
    #> 
    #> Attaching package: 'pROC'
    #> The following objects are masked from 'package:stats':
    #> 
    #>     cov, smooth, var
    
    df <- structure(list(response = c("NoResponse", "Response", "NoResponse", 
                                      "NoResponse", "NoResponse", "Response", "NoResponse", "NoResponse", 
                                      "NoResponse", "NoResponse", "NoResponse", "Response", "NoResponse", 
                                      "Response", "NoResponse", "NoResponse", "NoResponse", "NoResponse", 
                                      "Response", "Response", "NoResponse", "NoResponse", "NoResponse", 
                                      "NoResponse", "NoResponse", "NoResponse", "NoResponse", "Response", 
                                      "NoResponse", "NoResponse", "NoResponse", "NoResponse", "NoResponse", 
                                      "NoResponse", "NoResponse", "Response", "NoResponse", "NoResponse", 
                                      "NoResponse", "Response", "Response", "NoResponse", "Response", 
                                      "Response", "NoResponse", "Response", "Response", "NoResponse", 
                                      "Response", "Response"), 
                         CD4..Tem = c(0.206146305909711, 0.38344530718027, 
                                      0.111171710498514, -0.024620418652091, -0.024620418652091, 0.241356282324198, 
                                      0.190401963339481, 0.0381564797828905, 0.00208479038732372, -0.024620418652091, 
                                      0.0345146685774692, 0.0047448955916752, 0.0954402244646442, 0.216170609750478, 
                                      0.0679837609588422, 0.1087338604344, 0.0307783462567513, -0.024620418652091, 
                                      0.00930882669937516, 0.228984175232275, 0.198029266287967, 0.00849741399216577, 
                                      0.167122425878708, -0.0185389752646852, 0.0349285293854749, -0.015164453751509, 
                                      0.0530002007752186, -0.00464766527016771, 0.229228539194469, 
                                      -0.024620418652091, -0.024620418652091, 0.00909241866793368, 
                                      0.00959645779130966, -0.0169517988930254, 0.130416251320013, 
                                      -0.0226155780862924, -0.0226155780862924, -0.00708354014661853, 
                                      -0.0226155780862924, 0.0236200668251617, -0.0226155780862924, 
                                      -0.0226155780862924, -0.0226155780862924, -0.0226155780862924, 
                                      -0.0226155780862924, -0.00226780446329141, -0.00703727203694584, 
                                      -0.0226155780862924, 0.040447933249888, -0.0226155780862924), 
                         Epithelial.cells = c(0.213818759771441, 0.224884228557244, 
                                              0.213818759771441, 0.564636116181376, 0.213818759771441, 
                                              0.213818759771441, 0.256571259511661, 0.213818759771441, 
                                              0.225551386999972, 0.213818759771441, 0.320498217450289, 
                                              0.213818759771441, 0.213818759771441, 0.213818759771441, 
                                              0.250752952186148, 0.54432086478806, 0.213818759771441, 0.213818759771441, 
                                              0.213818759771441, 0.224579338204213, 0.244604368723937, 
                                              0.239048638424405, 0.213818759771441, 0.213818759771441, 
                                              0.213818759771441, 0.213818759771441, 0.755246080444261, 
                                              0.213818759771441, 0.224789200187943, 0.213818759771441, 
                                              0.213818759771441, 0.213818759771441, 0.602551670320415, 
                                              0.221211285726714, 0.528154858032774, 0.191393513022707, 
                                              0.204473730554233, 0.199140565064947, 0.191393513022707, 
                                              0.191393513022707, 0.208364515830724, 0.199044034497245, 
                                              0.218231966624601, 0.191393513022707, 0.199037583564646, 
                                              0.233310726880044, 0.257245920265987, 0.2245306029313, 0.30356359401388, 
                                              0.202283902795669)), 
                    row.names = c("Pt1", "Pt101", "Pt106", 
                                  "Pt11", "Pt17", "Pt18", "Pt24", "Pt26", "Pt27", "Pt28", "Pt29", 
                                  "Pt3", "Pt31", "Pt34", "Pt36", "Pt37", "Pt38", "Pt39", "Pt44", 
                                  "Pt49", "Pt5", "Pt52", "Pt59", "Pt62", "Pt65", "Pt66", "Pt67", 
                                  "Pt72", "Pt77", "Pt78", "Pt84", "Pt85", "Pt89", "Pt9", "Pt90", 
                                  "EA595454", "EA595500", "EA595522", "EA595529", "EA595597", "EA595624", 
                                  "EA595635", "EA595647", "EA595654", "EA595719", "EA595720", "EA632133", 
                                  "EA632171", "EA632174", "EA632234"), class = "data.frame")
    
    ctrlCV = trainControl(method = 'cv', number = 10 , classProbs = TRUE , savePredictions = TRUE, summaryFunction = twoClassSummary)
    rfGRID <- expand.grid(.mtry = sqrt(ncol(df[-c(1)])))
    
    rfFit <- train(response ~ ., data = df,
                   method = "rf",
                   metric = "ROC",
                   importance = TRUE,
                   trControl = ctrlCV,
                   tuneGrid = rfGRID,
                   ntree = 10,
                   nodesize = c(1:10)
    )
    
    print(rfFit)
    #> Random Forest 
    #> 
    #> 50 samples
    #>  2 predictor
    #>  2 classes: 'NoResponse', 'Response' 
    #> 
    #> No pre-processing
    #> Resampling: Cross-Validated (10 fold) 
    #> Summary of sample sizes: 45, 45, 44, 46, 45, 44, ... 
    #> Resampling results:
    #> 
    #>   ROC        Sens   Spec
    #>   0.6104167  0.825  0.35
    #> 
    #> Tuning parameter 'mtry' was held constant at a value of 1.414214
    
    rfROC = roc(df$response, predict(rfFit,df, type='prob')[,1])
    #> Setting levels: control = NoResponse, case = Response
    #> Setting direction: controls > cases
    plot(rfROC)
    

    # Use smaller ntree values for this example
    modellist <- list()
    for (ntree in c(10,20,50,75,100)){
      set.seed(123)
      fit <- train(response ~ ., data = df,
                   method = 'rf',
                   metric = 'ROC',
                   importance = TRUE,
                   trControl = ctrlCV,
                   tuneGrid = rfGRID,
                   ntree = ntree)
      key <- toString(ntree)
      modellist[[key]] <- fit
    }
    
    results <- resamples(modellist)
    summary(results)
    #> 
    #> Call:
    #> summary.resamples(object = results)
    #> 
    #> Models: 10, 20, 50, 75, 100 
    #> Number of resamples: 10 
    #> 
    #> ROC 
    #>          Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
    #> 10  0.1666667 0.5208333 0.6458333 0.5958333 0.7500000 0.8333333    0
    #> 20  0.0000000 0.2916667 0.5000000 0.4916667 0.6666667 0.9166667    0
    #> 50  0.0000000 0.3437500 0.5833333 0.4875000 0.6666667 0.7500000    0
    #> 75  0.0000000 0.3750000 0.5000000 0.5166667 0.7291667 0.8333333    0
    #> 100 0.0000000 0.3593750 0.5000000 0.5104167 0.7291667 0.8333333    0
    #> 
    #> Sens 
    #>          Min. 1st Qu. Median      Mean 3rd Qu. Max. NA's
    #> 10  0.3333333  0.6875   0.75 0.7750000       1    1    0
    #> 20  0.3333333  0.6875   0.75 0.7750000       1    1    0
    #> 50  0.3333333  0.7500   0.75 0.8000000       1    1    0
    #> 75  0.6666667  0.7500   0.75 0.8333333       1    1    0
    #> 100 0.6666667  0.7500   0.75 0.8333333       1    1    0
    #> 
    #> Spec 
    #>     Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
    #> 10     0       0      0 0.05   0.000  0.5    0
    #> 20     0       0      0 0.15   0.375  0.5    0
    #> 50     0       0      0 0.10   0.000  0.5    0
    #> 75     0       0      0 0.20   0.375  1.0    0
    #> 100    0       0      0 0.20   0.375  1.0    0
    dotplot(results)
    

    Created on 2022-09-02 by the reprex package (v2.0.1)