From 9c9353170990c9224dee1715cacfca4b0773170a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 5 Aug 2020 12:27:19 +0800 Subject: [PATCH] Update Python custom objective demo. (#5981) --- demo/guide-python/custom_objective.py | 49 ++++++++++++++++----------- tests/python/test_basic_models.py | 11 +++--- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py index 4830ae9fe1e6..ee4ac5b80fba 100644 --- a/demo/guide-python/custom_objective.py +++ b/demo/guide-python/custom_objective.py @@ -1,28 +1,28 @@ -import os -import numpy as np -import xgboost as xgb ### # advanced: customized loss function # +import os +import numpy as np +import xgboost as xgb + print('start running example to used customized objective function') CURRENT_DIR = os.path.dirname(__file__) dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) -# note: for customized objective function, we leave objective as default -# note: what we are getting is margin value in prediction -# you must know what you are doing -param = {'max_depth': 2, 'eta': 1} +# note: what we are getting is margin value in prediction you must know what +# you are doing +param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:logistic'} watchlist = [(dtest, 'eval'), (dtrain, 'train')] -num_round = 2 +num_round = 10 # user define objective function, given prediction, return gradient and second # order gradient this is log likelihood loss def logregobj(preds, dtrain): labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) + preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight grad = preds - labels hess = preds * (1.0 - preds) return grad, hess @@ -31,20 +31,31 @@ def logregobj(preds, dtrain): # user defined evaluation function, return a pair metric_name, result # NOTE: when you do customized loss function, the default prediction value is -# margin. this may make builtin evaluation metric not function properly for -# example, we are doing logistic loss, the prediction is score before logistic -# transformation the builtin evaluation error assumes input is after logistic -# transformation Take this in mind when you use the customization, and maybe -# you need write customized evaluation function +# margin, which means the prediction is score before logistic transformation. def evalerror(preds, dtrain): labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight # return a pair metric_name, result. The metric name must not contain a - # colon (:) or a space since preds are margin(before logistic - # transformation, cutoff at 0) - return 'my-error', float(sum(labels != (preds > 0.0))) / len(labels) + # colon (:) or a space + return 'my-error', float(sum(labels != (preds > 0.5))) / len(labels) + +py_evals_result = {} # training with customized objective, we can also do step by step training # simply look at xgboost.py's implementation of train -bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj, - feval=evalerror) +py_params = param.copy() +py_params.update({'disable_default_eval_metric': True}) +py_logreg = xgb.train(py_params, dtrain, num_round, watchlist, obj=logregobj, + feval=evalerror, evals_result=py_evals_result) + +evals_result = {} +params = param.copy() +params.update({'eval_metric': 'error'}) +logreg = xgb.train(params, dtrain, num_boost_round=num_round, evals=watchlist, + evals_result=evals_result) + + +for i in range(len(py_evals_result['train']['my-error'])): + np.testing.assert_almost_equal(py_evals_result['train']['my-error'], + evals_result['train']['error']) diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 7a1bc8c1f8f3..c1802caad877 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -197,9 +197,9 @@ def test_boost_from_prediction(self): assert np.all(np.abs(predt_2 - predt_1) < 1e-6) def test_custom_objective(self): - param = {'max_depth': 2, 'eta': 1, 'verbosity': 0} + param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:logistic'} watchlist = [(dtest, 'eval'), (dtrain, 'train')] - num_round = 2 + num_round = 10 def logregobj(preds, dtrain): labels = dtrain.get_label() @@ -210,10 +210,12 @@ def logregobj(preds, dtrain): def evalerror(preds, dtrain): labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) return 'error', float(sum(labels != (preds > 0.5))) / len(labels) # test custom_objective in training - bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) + bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj, + feval=evalerror) assert isinstance(bst, xgb.core.Booster) preds = bst.predict(dtest) labels = dtest.get_label() @@ -230,7 +232,8 @@ def neg_evalerror(preds, dtrain): labels = dtrain.get_label() return 'error', float(sum(labels == (preds > 0.0))) / len(labels) - bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True) + bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, + neg_evalerror, maximize=True) preds2 = bst2.predict(dtest) err2 = sum(1 for i in range(len(preds2)) if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2))