I tried to save the output of xgb.train
of XGBoost as a log file by logging
, but I could not record the output. How can I record it? I tried to refer to the existing Stackoverflow question but it was impossible. I would like you to show it with a concrete sample.
import sys
import logging
# ---------------------------------------------- #
# Some logging settings
# ---------------------------------------------- #
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_digits
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth':2, 'eta':0.3, 'silent':1, 'objective':'binary:logistic' }
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# I want to record this output.
# Zeros and Ones from the Digits dataset: binary classification
# [0] eval-error:0.011111 train-error:0.011111
# [1] eval-error:0.011111 train-error:0.005556
# [0] eval-error:0.016667 train-error:0.005556
# [1] eval-error:0.005556 train-error:0
The accepted solution does not work with xgboost version 1.3 and above. (Tested on 1.6.1), due to following:
In XGBoost 1.3, a new callback interface is designed for Python package.
(Source: https://xgboost.readthedocs.io/en/latest/python/callbacks.html)
You can achieve python logging for xgboost.train by defining custom logging callback and passing it as argument to xgb.train as shown below:
import logging
logger = logging.getLogger(__name__)
import xgboost
class XGBLogging(xgboost.callback.TrainingCallback):
"""log train logs to file"""
def __init__(self, epoch_log_interval=100):
self.epoch_log_interval = epoch_log_interval
def after_iteration(self, model, epoch, evals_log):
if epoch % self.epoch_log_interval == 0:
for data, metric in evals_log.items():
metrics = list(metric.keys())
metrics_str = ""
for m_key in metrics:
metrics_str = metrics_str + f"{m_key}: {metric[m_key][-1]}"
logger.info(f"Epoch: {epoch}, {data}: {metrics_str}")
# False to indicate training should not stop.
return False
model = xgboost.train(
xgboost_parms,
dtrain=dtrain,
evals=[(dtrain,"train"),(dvalid,"valid")]
callbacks=[XGBLogging(epoch_log_interval=100)]
)