mlr3 confidence interval for AUC and cvAUC

I would like to calculate the confidence interval for area under the curve (AUC) and cross-validated (cv) AUC using mlr3

I learned that for a regression task this could be done with predict_type = "se"

I wonder how this could be done for AUC/cvAUC within mlr3

A solution outside of mlr3 for cvAUC is probosed in the update below).

Example data:

# library
library(mlr3verse)
library(mlbench)

# get example data
data(PimaIndiansDiabetes, package="mlbench")
data <- PimaIndiansDiabetes

# make task
all.task <- TaskClassif$new("all.data", data, target = "diabetes")

#make a learner 
learner <- lrn("classif.log_reg", predict_type = "prob")

# resample 
rr = resample(all.task, learner, rsmp("cv"))
#> INFO  [12:19:45.662] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 5/10) 
#> INFO  [12:19:45.741] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 8/10) 
#> INFO  [12:19:45.780] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 10/10) 
#> INFO  [12:19:45.805] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 2/10) 
#> INFO  [12:19:45.831] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 6/10) 
#> INFO  [12:19:45.859] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 1/10) 
#> INFO  [12:19:45.899] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 9/10) 
#> INFO  [12:19:45.926] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 7/10) 
#> INFO  [12:19:45.954] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 3/10) 
#> INFO  [12:19:45.995] [mlr3]  Applying learner 'classif.log_reg' on task 'all.data' (iter 4/10)

# get AUC
rr$aggregate(msr("classif.auc"))
#> classif.auc 
#>   0.8297186

^{Created on 2021-04-02 by the reprex package (v1.0.0)}

Update:

Outside of mlr3 I would do it with the cvAUC package

library(cvAUC)
library(tidyverse)

# extract predictions
rr$predictions() -> cv_pred_model

# prepare data for cv ci
cv_pred_model %>%
  map(.,as.data.table) %>% 
  map_df(~as.data.frame(.x), .id="fold") -> go

# calculate ci cv
ci.cvAUC(predictions=go$prob.1,labels=go$truth,folds=go$fold,confidence=0.95)

Solution

There is currently no builtin way for mlr3 to calculate AUC uncertainty that is as comfortable calculating the measure by itself (i.e. nothing with $aggregate()). Instead you can call cvAUC::ci.cvAUC and give it the required data:

The ResampleResult object rr has the method $predictions(), which gives you the true value, as well as the predicted scores, for each resampling fold. You can use the data.table::rbindlist() function with idcol set to get a table of all ground truths, all predictions, and an indicator denoting the resampling fold (you have to turn the Prediction objects into data.table for this). These are all information you need for ci.cvAUC.

print(rr$predictions())
#> [[1]]
#> <PredictionClassif> for 77 observations:
#>     row_ids truth response   prob.neg   prob.pos   
#>           2   neg      neg 0.94955791 0.05044209
#>           6   neg      neg 0.85101781 0.14898219
#>          13   neg      pos 0.22516526 0.77483474
#> ---
#>         744   pos      pos 0.33871290 0.66128710
#>         745   neg      pos 0.06836943 0.93163057
#>         755   pos      pos 0.27998597 0.72001403
#>
#> [[2]]
#> <PredictionClassif> for 77 observations:
#>     row_ids truth response  prob.neg  prob.pos
#>          18   pos      neg 0.8050657 0.1949343               
#> [....]

predictiontables <- lapply(rr$predictions(), data.table::as.data.table)
allpred <- data.table::rbindlist(predictiontables, idcol = "fold")
print(allpred)
#>      fold row_ids truth response  prob.neg   prob.pos
#>   1:    1       2   neg      neg 0.9495579 0.05044209
#>   2:    1       6   neg      neg 0.8510178 0.14898219
#>   3:    1      13   neg      pos 0.2251653 0.77483474
#>   4:    1      37   neg      pos 0.3366958 0.66330422
#>   5:    1      41   neg      pos 0.2578118 0.74218818
#>  ---
#> 764:   10     739   neg      neg 0.8232726 0.17672735
#> 765:   10     746   neg      neg 0.6842442 0.31575585
#> 766:   10     749   pos      pos 0.1735568 0.82644319
#> 767:   10     759   neg      neg 0.8184856 0.18151445
#> 768:   10     763   neg      neg 0.9075691 0.09243093

cvAUC::ci.cvAUC(predictions = allpred$prob.pos,
  labels = allpred$truth, folds = allpred$fold)
#> $cvAUC
#> [1] 0.8315585
#> 
#> $se
#> [1] 0.01511107
#> 
#> $ci
#> [1] 0.8019414 0.8611757
#> 
#> $confidence
#> [1] 0.95
#>

If you like terse magrittr code, the equivalent is

library("data.table")
library("magrittr")

rr$predictions() %>%
  lapply(as.data.table) %>%
  rbindlist(idcol = "fold") %$%
  cvAUC::ci.cvAUC(predictions = prob.pos, labels = truth, folds = fold)

Note the AUC value I get differs from OP's because of random variance. rr$aggregate() agrees with cvAUC here:

rr$aggregate(msr("classif.auc"))
#> classif.auc
#>   0.8315585