Search code examples
lightgbmmlr3

How to save mlr3 lightgbm model correctly?


I have some following codes. I met error when save trained model. It's only error when i using lightgbm.

library(mlr3)
library(mlr3pipelines)
library(mlr3extralearners)

data = tsk("german_credit")$data()
data = data[, c("credit_risk", "amount", "purpose", "age")]
task = TaskClassif$new("boston", backend = data, target = "credit_risk")

g = po("imputemedian") %>>%
  po("imputeoor") %>>%
  po("fixfactors") %>>%
  po("encodeimpact") %>>% 
  lrn("classif.lightgbm")

gl = GraphLearner$new(g)

gl$train(task)

# predict 
newdata <- data[1,]
gl$predict_newdata(newdata) 
saveRDS(gl, "gl.rds")
# read model from disk ----------------
gl <- readRDS("gl.rds")
newdata <- data[1,]

# error when predict ------------------
gl$predict_newdata(newdata)

Solution

  • lightgbm uses special functions to save and read models. You have to extract the model before saving and add it to the graph learner after loading. However, this might be not practical for benchmarks. We will look into it.

    library(mlr3)
    library(mlr3pipelines)
    library(mlr3extralearners)
    library(lightgbm)
    
    data = tsk("german_credit")$data()
    data = data[, c("credit_risk", "amount", "purpose", "age")]
    task = TaskClassif$new("boston", backend = data, target = "credit_risk")
    
    g = po("imputemedian") %>>%
      po("imputeoor") %>>%
      po("fixfactors") %>>%
      po("encodeimpact") %>>% 
      lrn("classif.lightgbm")
    
    gl = GraphLearner$new(g)
    
    gl$train(task)
    
    # save model
    saveRDS.lgb.Booster(gl$model$classif.lightgbm$model, "model.rda")
    
    # save graph learner
    saveRDS(gl, "gl.rda")
    
    # load model
    model = readRDS.lgb.Booster("model.rda")
    
    # load graph learner
    gl = readRDS("gl.rda")
    
    # add model to graph learner
    gl$state$model$classif.lightgbm$model = model
    
    # predict
    newdata <- data[1,]
    gl$predict_newdata(newdata)