Search code examples
rmlr3

mlr3 resample autotuner - not showing tuned parameters?


I'm fairly new to mlr3, and have had issues in both getting the tuned hyper-parameters (from each of the cross validations), as well as the optimised hyper parameters using the AutoTuner method (to make use of nested resampling). My understanding is that after applying the resampling function on the AutoTuner, we should be able to see the individual tuned hyper-parameters from each iteration.

To demonstrate this - I use the mlr3 example provided on the website (https://mlr3gallery.mlr-org.com/house-prices-in-king-county/), tested on both a Linux server and a Windows machine. More specifically, I'm looking at the xgboost parameter tuning section - code below:

library(mlr3)
library(mlr3learners)
library(mlr3tuning)
library(paradox)

# load data
data("kc_housing", package = "mlr3data")

tsk = TaskRegr$new("sales", kc_housing[-1], target = "price")
set.seed(4411)
train.idx = sample(seq_len(tsk$nrow), 0.7 * tsk$nrow)
test.idx = setdiff(seq_len(tsk$nrow), train.idx)
task_train = tsk$clone()$filter(train.idx)
task_test  = tsk$clone()$filter(test.idx)

set.seed(444L)
lrn_xgb = lrn("regr.xgboost")

# Define the ParamSet
ps = paradox::ParamSet$new(
  params = list(
    ParamDbl$new(id = "eta", lower = 0.2, upper = .4),
    ParamDbl$new(id = "min_child_weight", lower = 1, upper = 20),
    ParamDbl$new(id = "subsample", lower = .7, upper = .8),
    ParamDbl$new(id = "colsample_bytree",  lower = .9, upper = 1),
    ParamDbl$new(id = "colsample_bylevel", lower = .5, upper = .7),
    ParamInt$new(id = "nrounds", lower = 1L, upper = 25)
  ))

# Define the cross validation
cv3 = rsmp("cv", folds = 3)
# Define the Terminator
terminator = term("evals", n_evals = 5)
at = AutoTuner$new(learner = lrn_xgb, 
                   resampling = rsmp("holdout"), 
                   measures = msr("regr.mse"), 
                   tune_ps = ps,
                   terminator = terminator, 
                   tuner = tnr("random_search"))

res = resample(task = task_train, at, cv3)
sapply(res$learners, function(x) x$param_set$values)

Below is the output from the sapply

        [,1] [,2] [,3]
nrounds 1    1    1   
verbose 0    0    0  

Session environment (for the Windows machine)

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale:
[1] LC_COLLATE=English_Australia.1252  LC_CTYPE=English_Australia.1252   
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] paradox_0.1.0      mlr3tuning_0.1.2   mlr3learners_0.1.6 mlr3_0.1.8        

loaded via a namespace (and not attached):
 [1] lgr_0.3.3          lattice_0.20-38    mlr3misc_0.1.8     digest_0.6.20      crayon_1.3.4      
 [6] grid_3.6.3         R6_2.4.0           mlr3measures_0.1.2 backports_1.1.4    magrittr_1.5      
[11] stringi_1.4.3      uuid_0.1-4         data.table_1.12.6  rstudioapi_0.10    Matrix_1.2-18     
[16] checkmate_2.0.0    xgboost_0.90.0.2   tools_3.6.3        compiler_3.6.3    

Solution

  • Currently, you need to specifically set store_models = TRUE during the resample() call to store the tune results in the AutoTuner.

    We might simplify this behavior in the future as it is confusing that the $tuning_result slot is empty even though store_tuning_instance = TRUE was set in the AutoTuner.

    library(mlr3)
    library(paradox)
    library(mlr3tuning)
    lgr::get_logger("mlr3")$set_threshold("warn")
    task = tsk("iris")
    learner = lrn("classif.rpart")
    resampling = rsmp("holdout")
    measures = msr("classif.ce")
    param_set = ParamSet$new(
      params = list(ParamDbl$new("cp", lower = 0.001, upper = 0.1)))
    
    terminator = term("evals", n_evals = 5)
    tuner = tnr("grid_search")
    at = AutoTuner$new(learner, resampling, measures, param_set, terminator, tuner)
    
    at$train(task)
    # tuning result exists
    at$tuning_result
    #> $tune_x
    #> $tune_x$cp
    #> [1] 0.067
    #> 
    #> 
    #> $params
    #> $params$xval
    #> [1] 0
    #> 
    #> $params$cp
    #> [1] 0.067
    #> 
    #> 
    #> $perf
    #> classif.ce 
    #>       0.08
    
    res = resample(task = task, at, rsmp("cv", folds = 3), store_models = TRUE)
    lapply(res$learners, function(x) x$tuning_result)
    #> [[1]]
    #> [[1]]$tune_x
    #> [[1]]$tune_x$cp
    #> [1] 0.012
    #> 
    #> 
    #> [[1]]$params
    #> [[1]]$params$xval
    #> [1] 0
    #> 
    #> [[1]]$params$cp
    #> [1] 0.012
    #> 
    #> 
    #> [[1]]$perf
    #> classif.ce 
    #> 0.09090909 
    #> 
    #> 
    #> [[2]]
    #> [[2]]$tune_x
    #> [[2]]$tune_x$cp
    #> [1] 0.078
    #> 
    #> 
    #> [[2]]$params
    #> [[2]]$params$xval
    #> [1] 0
    #> 
    #> [[2]]$params$cp
    #> [1] 0.078
    #> 
    #> 
    #> [[2]]$perf
    #> classif.ce 
    #> 0.09090909 
    #> 
    #> 
    #> [[3]]
    #> [[3]]$tune_x
    #> [[3]]$tune_x$cp
    #> [1] 0.045
    #> 
    #> 
    #> [[3]]$params
    #> [[3]]$params$xval
    #> [1] 0
    #> 
    #> [[3]]$params$cp
    #> [1] 0.045
    #> 
    #> 
    #> [[3]]$perf
    #> classif.ce 
    #> 0.06060606
    

    Created on 2020-03-20 by the reprex package (v0.3.0)