Search code examples
rcross-validationmultilabel-classificationmlr

MLR resampling creates oneclass problems for multilabel classification


I am trying to measure performance of multilabel classification for some MLR classifiers using cross validation

I tried to use MLR resample method or pass my own subset, however in both situations an error gets thrown (from what I have found out it happens when subset used for training contains only single values for some label)

Below is a small example where this problem occurs:

learner = mlr::makeLearner("classif.logreg")

learner = makeMultilabelClassifierChainsWrapper(learner)

data = data.frame(
    attr1 = c(1, 2, 2, 1, 2, 1, 2),
    attr2 = c(2, 1, 2, 2, 1, 2, 1),
    lab1 = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
    lab2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE))

task = mlr::makeMultilabelTask(data=data, target=c('lab1', 'lab2'))

here are two ways two get an error:

1.

rDesc = makeResampleDesc("CV", iters = 3)

resample(learner, task, rDesc)

2.

model = mlr::train(learner, task, subset=c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE))

The error message:

Error in checkLearnerBeforeTrain(task, learner, weights): Task 'lab1' is a one-class-problem, but learner 'classif.logreg' does not support that!


Solution

  • As there are no learners in MLR that support one-class ( https://mlr.mlr-org.com/articles/tutorial/integrated_learners.html ) classification and splitting the data may require too much fuss (especially for datasets like reutersk500), I have created a wrapper for twoclass learners that, if given task with single target class, will always return this class only value, and for more classes will use wrapped learner:

    (This code will be a part of repository https://github.com/lychanl/ChainsOfClassification )

    makeOneClassWrapper = function(learner) {
        learner = checkLearner(learner, type='classif')
        id = paste("classif.oneClassWrapper", getLearnerId(learner), sep = ".")
        packs = getLearnerPackages(learner)
        type = getLearnerType(learner)
        x = mlr::makeBaseWrapper(id, type, learner, packs, makeParamSet(),
            learner.subclass = c("OneClassWrapper"),
            model.subclass = c("OneClassWrapperModel"))
        x$type = "classif"
        x$properties = c(learner$properties, 'oneclass')
        return(x)
    }
    
    trainLearner.OneClassWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) {
        if (length(getTaskDesc(.task)$class.levels) <= 1) {
            x = list(oneclass=TRUE, value=.task$task.desc$positive)
            class(x) = "OneClassWrapperModel"
            return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
        }
    
        model = train(.learner$next.learner, .task, .subset, .weights)
    
        x = list(oneclass=FALSE, model=model)
        class(x) = "OneClassWrapperModel"
        return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
    }
    
    predictLearner.OneClassWrapper = function(.learner, .model, .newdata, ...) {
        .model = mlr::getLearnerModel(.model, more.unwrap = FALSE)
    
        if (.model$oneclass) {
            out = as.logical(rep(.model$value, nrow(.newdata)))
        }
        else {
            pred = predict(.model$model, newdata=.newdata)
    
            if (.learner$predict.type == "response") {
                out = getPredictionResponse(pred)
            } else {
                out = getPredictionProbabilities(pred, cl="TRUE")
            }
        }
    
        return(as.factor(out))
    }
    
    getLearnerProperties.OneClassWrapper = function(.learner) {
        return(.learner$properties)
    }
    
    isFailureModel.OneClassWrapperModel = function(model) {
        model = mlr::getLearnerModel(model, more.unwrap = FALSE)
    
      return(!model$oneclass && isFailureModel(model$model))
    }
    
    getFailureModelMsg.OneClassWrapperModel = function(model) {
        model = mlr::getLearnerModel(model, more.unwrap = FALSE)
      if (model$oneclass)
          return("")
      return(getFailureModelMsg(model$model))
    }
    
    getFailureModelDump.OneClassWrapperModel = function(model) {
        model = mlr::getLearnerModel(model, more.unwrap = FALSE)
      if (model$oneclass)
          return("")
      return(getFailureModelDump(model$model))
    }
    
    registerS3method("trainLearner", "<OneClassWrapper>", 
      trainLearner.OneClassWrapper)
    registerS3method("getLearnerProperties", "<OneClassWrapper>", 
      getLearnerProperties.OneClassWrapper)
    registerS3method("isFailureModel", "<OneClassWrapperModel>", 
      isFailureModel.OneClassWrapperModel)
    registerS3method("getFailureModelMsg", "<OneClassWrapperModel>", 
      getFailureModelMsg.OneClassWrapperModel)
    registerS3method("getFailureModelDump", "<OneClassWrapperModel>", 
      getFailureModelDump.OneClassWrapperModel)