Skip to content

Commit

Permalink
Calculate loss support (#1075)
Browse files Browse the repository at this point in the history
* Calculate loss support

* Relaxed log loss test for individual models

* Feedback from #1075

* Missing loss in comment

* Revert back test as well
  • Loading branch information
franchuterivera authored Feb 16, 2021
1 parent a275763 commit cf27323
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 212 deletions.
220 changes: 113 additions & 107 deletions autosklearn/ensemble_builder.py

Large diffs are not rendered by default.

32 changes: 15 additions & 17 deletions autosklearn/ensembles/ensemble_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from autosklearn.constants import TASK_TYPES
from autosklearn.ensembles.abstract_ensemble import AbstractEnsemble
from autosklearn.metrics import Scorer, calculate_score
from autosklearn.metrics import Scorer, calculate_loss
from autosklearn.pipeline.base import BasePipeline


Expand Down Expand Up @@ -100,7 +100,7 @@ def _fast(
dtype=np.float64,
)
for i in range(ensemble_size):
scores = np.zeros(
losses = np.zeros(
(len(predictions)),
dtype=np.float64,
)
Expand Down Expand Up @@ -129,24 +129,23 @@ def _fast(
out=fant_ensemble_prediction
)

# Calculate score is versatile and can return a dict of score
# calculate_loss is versatile and can return a dict of losses
# when scoring_functions=None, we know it will be a float
calculated_score = cast(
losses[j] = cast(
float,
calculate_score(
calculate_loss(
solution=labels,
prediction=fant_ensemble_prediction,
task_type=self.task_type,
metric=self.metric,
scoring_functions=None
)
)
scores[j] = self.metric._optimum - calculated_score

all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
best = self.random_state.choice(all_best)
ensemble.append(predictions[best])
trajectory.append(scores[best])
trajectory.append(losses[best])
order.append(best)

# Handle special case
Expand All @@ -155,7 +154,7 @@ def _fast(

self.indices_ = order
self.trajectory_ = trajectory
self.train_score_ = trajectory[-1]
self.train_loss_ = trajectory[-1]

def _slow(
self,
Expand All @@ -172,30 +171,29 @@ def _slow(
ensemble_size = self.ensemble_size

for i in range(ensemble_size):
scores = np.zeros(
losses = np.zeros(
[np.shape(predictions)[0]],
dtype=np.float64,
)
for j, pred in enumerate(predictions):
ensemble.append(pred)
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
# Calculate score is versatile and can return a dict of score
# calculate_loss is versatile and can return a dict of losses
# when scoring_functions=None, we know it will be a float
calculated_score = cast(
losses[j] = cast(
float,
calculate_score(
calculate_loss(
solution=labels,
prediction=ensemble_prediction,
task_type=self.task_type,
metric=self.metric,
scoring_functions=None
)
)
scores[j] = self.metric._optimum - calculated_score
ensemble.pop()
best = np.nanargmin(scores)
best = np.nanargmin(losses)
ensemble.append(predictions[best])
trajectory.append(scores[best])
trajectory.append(losses[best])
order.append(best)

# Handle special case
Expand All @@ -210,7 +208,7 @@ def _slow(
trajectory,
dtype=np.float64,
)
self.train_score_ = trajectory[-1]
self.train_loss_ = trajectory[-1]

def _calculate_weights(self) -> None:
ensemble_members = Counter(self.indices_).most_common()
Expand Down
40 changes: 16 additions & 24 deletions autosklearn/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from autosklearn.pipeline.implementations.util import (
convert_multioutput_multiclass_to_multilabel
)
from autosklearn.metrics import calculate_score
from autosklearn.metrics import calculate_loss, Scorer
from autosklearn.util.logging_ import get_named_client_logger

from ConfigSpace import Configuration
Expand Down Expand Up @@ -236,17 +236,18 @@ def _get_model(self):
init_params=self._init_params)
return model

def _loss(self, y_true, y_hat, scoring_functions=None):
"""Auto-sklearn follows a minimization goal, so the make_scorer
sign is used as a guide to obtain the value to reduce.
On this regard, to optimize a metric:
1- score is calculared with calculate_score, with the caveat, that if
for the metric greater is not better, a negative score is returned.
2- the err (the optimization goal) is then:
optimum - (metric.sign * actual_score)
For accuracy for example: optimum(1) - (+1 * actual score)
For logloss for example: optimum(0) - (-1 * actual score)
def _loss(self, y_true: np.ndarray, y_hat: np.ndarray,
scoring_functions: typing.Optional[typing.List[Scorer]] = None
) -> typing.Union[float, typing.Dict[str, float]]:
"""Auto-sklearn follows a minimization goal.
The calculate_loss internally translate a score function to
a minimization problem.
For a dummy prediction, the worst result is assumed.
Parameters
----------
y_true
"""
scoring_functions = (
self.scoring_functions
Expand All @@ -255,23 +256,14 @@ def _loss(self, y_true, y_hat, scoring_functions=None):
)
if not isinstance(self.configuration, Configuration):
if scoring_functions:
return {self.metric.name: 1.0}
return {self.metric.name: self.metric._worst_possible_result}
else:
return 1.0
return self.metric._worst_possible_result

score = calculate_score(
return calculate_loss(
y_true, y_hat, self.task_type, self.metric,
scoring_functions=scoring_functions)

if hasattr(score, '__len__'):
err = {metric.name: metric._optimum - score[metric.name]
for metric in scoring_functions}
err[self.metric.name] = self.metric._optimum - score[self.metric.name]
else:
err = self.metric._optimum - score

return err

def finish_up(self, loss, train_loss, opt_pred, valid_pred, test_pred,
additional_run_info, file_output, final_call, status):
"""This function does everything necessary after the fitting is done:
Expand Down
17 changes: 3 additions & 14 deletions autosklearn/evaluation/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
AbstractEvaluator,
_fit_and_suppress_warnings,
)
from autosklearn.metrics import calculate_score, CLASSIFICATION_METRICS, REGRESSION_METRICS
from autosklearn.constants import CLASSIFICATION_TASKS
from autosklearn.metrics import calculate_loss


__all__ = [
Expand Down Expand Up @@ -71,7 +70,7 @@ def predict_and_loss(self, train=False):
if train:
Y_pred = self.predict_function(self.X_train, self.model,
self.task_type, self.Y_train)
score = calculate_score(
err = calculate_loss(
solution=self.Y_train,
prediction=Y_pred,
task_type=self.task_type,
Expand All @@ -80,23 +79,13 @@ def predict_and_loss(self, train=False):
else:
Y_pred = self.predict_function(self.X_test, self.model,
self.task_type, self.Y_train)
score = calculate_score(
err = calculate_loss(
solution=self.Y_test,
prediction=Y_pred,
task_type=self.task_type,
metric=self.metric,
scoring_functions=self.scoring_functions)

if hasattr(score, '__len__'):
if self.task_type in CLASSIFICATION_TASKS:
err = {key: metric._optimum - score[key] for key, metric in
CLASSIFICATION_METRICS.items() if key in score}
else:
err = {key: metric._optimum - score[key] for key, metric in
REGRESSION_METRICS.items() if key in score}
else:
err = self.metric._optimum - score

return err, Y_pred, None, None


Expand Down
73 changes: 68 additions & 5 deletions autosklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABCMeta, abstractmethod
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Union
from typing import Any, Callable, Dict, List, Optional, Union, cast

import numpy as np

Expand Down Expand Up @@ -353,7 +353,7 @@ def calculate_score(
for metric_ in scoring_functions:

try:
score_dict[metric_.name] = metric_(solution, cprediction)
score_dict[metric_.name] = metric_._sign * metric_(solution, cprediction)
except ValueError as e:
print(e, e.args[0])
if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
Expand All @@ -369,7 +369,7 @@ def calculate_score(
# handle?

try:
score_dict[metric_.name] = metric_(solution, prediction)
score_dict[metric_.name] = metric_._sign * metric_(solution, prediction)
except ValueError as e:
if e.args[0] == 'multiclass format is not supported':
continue
Expand Down Expand Up @@ -397,10 +397,73 @@ def get_metric_score(
solution: np.ndarray,
task_type: int
) -> float:
# We match the behaviour of GridSearchCV
# In scikit learn, the exact value of the score_func
# is returned (not that of the 'Scorer' which might be
# negative in functions like mse, as scikit learn
# maximizes.) If an user wants to use GridSearchCV
# They are expected to pass neg_mean_squared_error
# For this reason we multiply back by metric_._sign
if task_type in REGRESSION_TASKS:
# TODO put this into the regression metric itself
cprediction = sanitize_array(prediction)
score = metric_(solution, cprediction)
score = metric_._sign * metric_(solution, cprediction)
else:
score = metric_(solution, prediction)
score = metric_._sign * metric_(solution, prediction)
return score


def calculate_loss(
solution: np.ndarray,
prediction: np.ndarray,
task_type: int,
metric: Scorer,
scoring_functions: Optional[List[Scorer]] = None
) -> Union[float, Dict[str, float]]:
"""
Returns a loss (a magnitude that allows casting the
optimization problem, as a minimization one) for the
given Auto-Sklearn Scorer object
Parameters
----------
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
scoring_functions: List[Scorer]
A list of metrics to calculate multiple losses
Returns
-------
float or Dict[str, float]
A loss function for each of the provided scorer objects
"""
score = calculate_score(
solution=solution,
prediction=prediction,
task_type=task_type,
metric=metric,
scoring_functions=scoring_functions,
)

if scoring_functions:
score = cast(Dict, score)
# we expect a dict() object for which we should calculate the loss
loss_dict = dict()
for metric_ in scoring_functions + [metric]:
# TODO: When metrics are annotated with type_of_target support
# we can remove this check
if metric_.name not in score:
continue
# maybe metric argument is not in scoring_functions
# so append it to the list. Rather than check if such
# is the case, redefining loss_dict[metric] is less expensive
loss_dict[metric_.name] = metric_._optimum - metric_._sign * score[metric_.name]
return loss_dict
else:
return metric._optimum - metric._sign * cast(float, score)
5 changes: 3 additions & 2 deletions test/test_automl/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def test_automl_outputs(backend, dask_client):
'start_time_100',
'datamanager.pkl',
'ensemble_read_preds.pkl',
'ensemble_read_scores.pkl',
'ensemble_read_losses.pkl',
'runs',
'ensembles',
'ensemble_history.json',
Expand Down Expand Up @@ -625,7 +625,8 @@ def test_load_best_individual_model(metric, backend, dask_client):
if metric.name == 'balanced_accuracy':
assert automl.score(X_test, Y_test) > 0.9
elif metric.name == 'log_loss':
assert automl.score(X_test, Y_test) <= 0.2
# Seen values in github actions of 0.6978304740364537
assert automl.score(X_test, Y_test) <= 0.72
else:
raise ValueError(metric.name)

Expand Down
Loading

0 comments on commit cf27323

Please sign in to comment.