Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD: early stopping in xgboost/lightgbm #928

Merged
merged 2 commits into from
Aug 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions recbole/properties/model/xgboost.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ xgb_params:
eta: 1
seed: 2020
# nthread: -1
xgb_num_boost_round: 500
xgb_num_boost_round: 100
xgb_early_stopping_rounds: ~
xgb_verbose_eval: 100
xgb_verbose_eval: 50

59 changes: 49 additions & 10 deletions recbole/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,9 +662,18 @@ def __init__(self, config, model):
# model saved
self.checkpoint_dir = config['checkpoint_dir']
ensure_dir(self.checkpoint_dir)
temp_file = '{}-{}-temp.pth'.format(self.config['model'], get_local_time())
self.temp_file = os.path.join(self.checkpoint_dir, temp_file)

saved_model_file = '{}-{}.pth'.format(self.config['model'], get_local_time())
self.saved_model_file = os.path.join(self.checkpoint_dir, saved_model_file)

self.stopping_step = config['stopping_step']
self.valid_metric_bigger = config['valid_metric_bigger']
self.cur_step = 0
self.best_valid_score = -np.inf if self.valid_metric_bigger else np.inf
self.best_valid_result = None

def _interaction_to_sparse(self, dataloader):
r"""Convert data format from interaction to sparse or numpy

Expand Down Expand Up @@ -725,7 +734,7 @@ def _valid_epoch(self, valid_data):
Args:
valid_data (DecisionTreeDataLoader): DecisionTreeDataLoader, which is the same with GeneralDataLoader.
"""
valid_result = self.evaluate(valid_data)
valid_result = self.evaluate(valid_data, load_best_model=False)
valid_score = calculate_valid_score(valid_result, self.valid_metric)
return valid_result, valid_score

Expand All @@ -734,16 +743,22 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
if self.boost_model is not None:
self.model.load_model(self.boost_model)

self.best_valid_score = 0.
self.best_valid_result = 0.

for epoch_idx in range(self.epochs):
self._train_at_once(train_data, valid_data)

if (epoch_idx + 1) % self.eval_step == 0:
# evaluate
valid_start_time = time()
valid_result, valid_score = self._valid_epoch(valid_data)

self.best_valid_score, self.cur_step, stop_flag, update_flag = early_stopping(
valid_score,
self.best_valid_score,
self.cur_step,
max_step=self.stopping_step,
bigger=self.valid_metric_bigger
)

valid_end_time = time()
valid_score_output = (set_color("epoch %d evaluating", 'green') + " [" + set_color("time", 'blue')
+ ": %.2fs, " + set_color("valid_score", 'blue') + ": %f]") % \
Expand All @@ -754,8 +769,20 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
self.logger.info(valid_result_output)
self.tensorboard.add_scalar('Vaild_score', valid_score, epoch_idx)

self.best_valid_score = valid_score
self.best_valid_result = valid_result
if update_flag:
if saved:
self.model.save_model(self.saved_model_file)
update_output = set_color('Saving current best', 'blue') + ': %s' % self.saved_model_file
if verbose:
self.logger.info(update_output)
self.best_valid_result = valid_result

if stop_flag:
stop_output = 'Finished training, best eval result in epoch %d' % \
(epoch_idx - self.cur_step * self.eval_step)
if verbose:
self.logger.info(stop_output)
break

return self.best_valid_score, self.best_valid_result

Expand Down Expand Up @@ -818,10 +845,16 @@ def _train_at_once(self, train_data, valid_data):
callbacks=self.callbacks
)

self.model.save_model(self.saved_model_file)
self.boost_model = self.saved_model_file
self.model.save_model(self.temp_file)
self.boost_model = self.temp_file

def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False):
if load_best_model:
if model_file:
checkpoint_file = model_file
else:
checkpoint_file = self.saved_model_file
self.model.load_model(checkpoint_file)
self.eval_pred = torch.Tensor()
self.eval_true = torch.Tensor()

Expand Down Expand Up @@ -953,10 +986,16 @@ def _train_at_once(self, train_data, valid_data):
callbacks=self.callbacks
)

self.model.save_model(self.saved_model_file)
self.boost_model = self.saved_model_file
self.model.save_model(self.temp_file)
self.boost_model = self.temp_file

def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False):
if load_best_model:
if model_file:
checkpoint_file = model_file
else:
checkpoint_file = self.saved_model_file
self.model.load_model(checkpoint_file)
self.eval_pred = torch.Tensor()
self.eval_true = torch.Tensor()

Expand Down