How to make Bayesian hyperparameter optimization reproducible using "mbo" tuner?

I would like to use R's mlr3* packages to build ML algos in a reproducible manner. I have tried to use regr.glmboost learner with mbo tuner and run_time terminator. I have played around with the HPO part but I have not been able to make it reproducible with higher runtimes. Where did I go wrong?

Here is reprex about the phenomenon:

library(mlr3verse)
library(mlr3mbo)
library(mlr3misc)
library(magrittr)
library(nycflights13)

dt <- as.data.table(weather)
dt <- dt[order(time_hour), .(origin = as.factor(origin), month = as.factor(month), hour = as.factor(hour), temp, dewp, humid, wind_dir, wind_speed, precip, visib, pressure, time_hour = as.numeric(time_hour))]
dt <- na.omit(dt)

best_ones <- map_dtr(
  1L:3L,
  function(i) {
    my_learner <- lrn("regr.glmboost",
      family = to_tune(p_fct(levels = c("Gaussian", "Laplace", "Huber"))),
      nuirange = to_tune(p_dbl(lower = 0, upper = 1000, logscale = FALSE)),
      mstop = to_tune(p_int(lower = 1, upper = 3, trafo = function(x) 10**x)),
      nu = to_tune(p_dbl(lower = 0.01, upper = 0.3, logscale = TRUE)),
      risk = to_tune(p_fct(levels = c("inbag", "oobag", "none"))),
      trace = to_tune(c(TRUE, FALSE)),
      stopintern = to_tune(c(TRUE, FALSE))
    )

    my_task <- as_task_regr(
      x = dt,
      target = "pressure",
      id = "weather_data"
    )

    my_instance <- ti(
      task = my_task,
      learner = my_learner,
      resampling = rsmp("cv", folds = 3),
      measure = msr("regr.mae"),
      terminator = trm("run_time", secs = 300)
    )

    my_tuner <- tnr("mbo")

    set.seed(1234L, kind = "L'Ecuyer-CMRG")
    my_tuner$optimize(my_instance)

    my_instance$archive$best()
  }
)

best_ones[]

These are the somewhat diverse hyperparameters what I have got:

family	nuirange	mstop	nu	risk	trace	stopintern	regr.mae	runtime_learners	uhash	timestamp	batch_nr	acq_ei	.already_evaluated
Huber	841.3256	3	-2.794395	inbag	FALSE	FALSE	5.090834	9.656	01cf38ab-3dc6-4490-b36e-1c14325e42ad	2023-01-10 17:08:15	26	0.0010821	FALSE
Huber	849.4117	3	-2.774291	oobag	FALSE	FALSE	5.094204	9.646	6579c965-9184-4fe3-8e01-c1b10df21782	2023-01-10 17:11:56	18	0.0021940	FALSE
Huber	855.7414	3	-2.878846	oobag	FALSE	FALSE	5.096876	9.497	458122cc-f51c-4d81-a6d2-93dc024baa58	2023-01-10 17:16:22	15	0.0090615	FALSE

I guess the issue is around seeding, but I do not know how to make it the proper way in this case. Any help would be appreciated!

Solution

So I think there are two possible sources of error in your code:

I think the seed should be at the beginning of the function that is called in map_dtr(...) just to be sure.
I would not rely on the runtime terminator, as the different iterations might get more or less actual computation even though they have the same runtime. (You even see it in the output below, that all three repetitions have a slightly different runtime)

Addressing both seems to yield reproducible results (some of the code is slightly adjusted to keep the runtime lower).

I hope this also works for you.

library(mlr3verse)
#> Loading required package: mlr3
library(mlr3mbo)
#> Loading required package: mlr3tuning
#> Loading required package: paradox
library(mlr3misc)
library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following objects are masked from 'package:mlr3misc':
#> 
#>     set_class, set_names
library(nycflights13)

dt <- as.data.table(weather)
dt <- dt[order(time_hour), .(origin = as.factor(origin), month = as.factor(month), hour = as.factor(hour), temp, dewp, humid, wind_dir, wind_speed, precip, visib, pressure, time_hour = as.numeric(time_hour))]
dt <- na.omit(dt)

best_ones <- map_dtr(
  1L:3L,
  function(i) {
    set.seed(1234L, kind = "L'Ecuyer-CMRG")
    my_learner <- lrn("regr.glmboost",
      family = to_tune(p_fct(levels = c("Gaussian", "Laplace", "Huber"))),
      nuirange = to_tune(p_dbl(lower = 0, upper = 1000, logscale = FALSE)),
      mstop = to_tune(p_int(lower = 1, upper = 3, trafo = function(x) 10**x)),
      nu = to_tune(p_dbl(lower = 0.01, upper = 0.3, logscale = TRUE)),
      risk = to_tune(p_fct(levels = c("inbag", "oobag", "none"))),
      trace = to_tune(c(TRUE, FALSE)),
      stopintern = to_tune(c(TRUE, FALSE))
    )

    my_task <- as_task_regr(
      x = dt,
      target = "pressure",
      id = "weather_data"
    )

    my_instance <- ti(
      task = my_task,
      learner = my_learner,
      resampling = rsmp("holdout"),
      measure = msr("regr.mae"),
      terminator = trm("evals", n_evals = 2)
    )

    my_tuner <- tnr("mbo")

    my_tuner$optimize(my_instance)

    my_instance$archive$best()
  }
)
#> INFO  [22:33:53.565] [bbotk] Starting to optimize 7 parameter(s) with '<OptimizerMbo>' and '<TerminatorEvals> [n_evals=2, k=0]'
#> ... (A LOT OF LOG OUTPUT THAT IS OMITTED)

best_ones[]
#>    family nuirange mstop        nu risk trace stopintern regr.mae warnings
#> 1:  Huber 85.45087     3 -2.572761 none FALSE      FALSE  4.74076        0
#> 2:  Huber 85.45087     3 -2.572761 none FALSE      FALSE  4.74076        0
#> 3:  Huber 85.45087     3 -2.572761 none FALSE      FALSE  4.74076        0
#>    errors runtime_learners                                uhash  x_domain
#> 1:      0            3.406 653adf83-6fbc-4ef5-b6dd-7e12e97b49d6 <list[7]>
#> 2:      0            3.773 f70dec6a-073a-45c8-b795-29ef5b662625 <list[7]>
#> 3:      0            3.673 dc18e13f-2e7d-4fe9-9845-187ebf0db3b3 <list[7]>
#>              timestamp batch_nr
#> 1: 2023-01-10 22:34:17        1
#> 2: 2023-01-10 22:34:41        1
#> 3: 2023-01-10 22:35:05        1

^{Created on 2023-01-10 by the reprex package (v2.0.1)}