Skip to content

Commit

Permalink
Fix calculate loss (#1123)
Browse files Browse the repository at this point in the history
* FIX an issue with calculate loss

* simplify code

* fix unit test
  • Loading branch information
mfeurer authored Apr 12, 2021
1 parent f6d6be4 commit c2b0e73
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 66 deletions.
11 changes: 5 additions & 6 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
from autosklearn.evaluation.train_evaluator import _fit_with_budget
from autosklearn.metrics import calculate_score
from autosklearn.metrics import calculate_metric
from autosklearn.util.backend import Backend
from autosklearn.util.stopwatch import StopWatch
from autosklearn.util.logging_ import (
Expand Down Expand Up @@ -1153,11 +1153,10 @@ def score(self, X, y):
# same representation domain
prediction = self.InputValidator.target_validator.transform(prediction)

return calculate_score(solution=y,
prediction=prediction,
task_type=self._task,
metric=self._metric,
scoring_functions=None)
return calculate_metric(solution=y,
prediction=prediction,
task_type=self._task,
metric=self._metric, )

@property
def cv_results_(self):
Expand Down
7 changes: 3 additions & 4 deletions autosklearn/ensemble_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def __init__(
performance_range_threshold: float
Keep only models that are better than:
dummy + (best - dummy)*performance_range_threshold
E.g dummy=2, best=4, thresh=0.5 --> only consider models with score > 3
E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3
Will at most return the minimum between ensemble_nbest models,
and max_models_on_disc. Might return less
seed: int
Expand Down Expand Up @@ -978,7 +978,7 @@ def get_n_best_preds(self):
# no model left; try to use dummy loss (num_run==0)
# log warning when there are other models but not better than dummy model
if num_keys > num_dummy:
self.logger.warning("No models better than random - using Dummy Score!"
self.logger.warning("No models better than random - using Dummy loss!"
"Number of models besides current dummy model: %d. "
"Number of dummy models: %d",
num_keys - 1,
Expand Down Expand Up @@ -1105,8 +1105,7 @@ def get_n_best_preds(self):
# only if the model ends up in the ensemble
self.read_losses[k]['loaded'] = 1

# return best scored keys of self.read_losses
# That is, the one with the lowest loss
# return keys of self.read_losses with lowest losses
return sorted_keys[:ensemble_n_best]

def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], List[str]]:
Expand Down
170 changes: 122 additions & 48 deletions autosklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,18 +342,40 @@ def calculate_score(
metric: Scorer,
scoring_functions: Optional[List[Scorer]] = None
) -> Union[float, Dict[str, float]]:
"""
Returns a score (a magnitude that allows casting the
optimization problem as a maximization one) for the
given Auto-Sklearn Scorer object
Parameters
----------
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
scoring_functions: List[Scorer]
A list of metrics to calculate multiple losses
Returns
-------
float or Dict[str, float]
"""
if task_type not in TASK_TYPES:
raise NotImplementedError(task_type)

if scoring_functions:
score_dict = dict()
if task_type in REGRESSION_TASKS:
# TODO put this into the regression metric itself
cprediction = sanitize_array(prediction)
for metric_ in scoring_functions:
for metric_ in scoring_functions + [metric]:

try:
score_dict[metric_.name] = metric_._sign * metric_(solution, cprediction)
score_dict[metric_.name] = _compute_scorer(
metric_, prediction, solution, task_type)
except ValueError as e:
print(e, e.args[0])
if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
Expand All @@ -363,13 +385,14 @@ def calculate_score(
raise e

else:
for metric_ in scoring_functions:
for metric_ in scoring_functions + [metric]:

# TODO maybe annotate metrics to define which cases they can
# handle?

try:
score_dict[metric_.name] = metric_._sign * metric_(solution, prediction)
score_dict[metric_.name] = _compute_scorer(
metric_, prediction, solution, task_type)
except ValueError as e:
if e.args[0] == 'multiclass format is not supported':
continue
Expand All @@ -383,34 +406,10 @@ def calculate_score(
else:
raise e

if metric.name not in score_dict.keys():
score_dict[metric.name] = get_metric_score(metric, prediction, solution, task_type)
return score_dict

else:
return get_metric_score(metric, prediction, solution, task_type)


def get_metric_score(
metric_: Scorer,
prediction: np.ndarray,
solution: np.ndarray,
task_type: int
) -> float:
# We match the behaviour of GridSearchCV
# In scikit learn, the exact value of the score_func
# is returned (not that of the 'Scorer' which might be
# negative in functions like mse, as scikit learn
# maximizes.) If an user wants to use GridSearchCV
# They are expected to pass neg_mean_squared_error
# For this reason we multiply back by metric_._sign
if task_type in REGRESSION_TASKS:
# TODO put this into the regression metric itself
cprediction = sanitize_array(prediction)
score = metric_._sign * metric_(solution, cprediction)
else:
score = metric_._sign * metric_(solution, prediction)
return score
return _compute_scorer(metric, prediction, solution, task_type)


def calculate_loss(
Expand All @@ -422,26 +421,28 @@ def calculate_loss(
) -> Union[float, Dict[str, float]]:
"""
Returns a loss (a magnitude that allows casting the
optimization problem, as a minimization one) for the
optimization problem as a minimization one) for the
given Auto-Sklearn Scorer object
Parameters
----------
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
scoring_functions: List[Scorer]
A list of metrics to calculate multiple losses
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
scoring_functions: List[Scorer]
A list of metrics to calculate multiple losses
Returns
-------
float or Dict[str, float]
A loss function for each of the provided scorer objects
float or Dict[str, float]
A loss function for each of the provided scorer objects
"""
score = calculate_score(
solution=solution,
Expand All @@ -463,7 +464,80 @@ def calculate_loss(
# maybe metric argument is not in scoring_functions
# so append it to the list. Rather than check if such
# is the case, redefining loss_dict[metric] is less expensive
loss_dict[metric_.name] = metric_._optimum - metric_._sign * score[metric_.name]
loss_dict[metric_.name] = metric_._optimum - score[metric_.name]
return loss_dict
else:
return metric._optimum - metric._sign * cast(float, score)
rval = metric._optimum - cast(float, score)
return rval


def calculate_metric(
metric: Scorer,
prediction: np.ndarray,
solution: np.ndarray,
task_type: int
) -> float:
"""
Returns a metric for the given Auto-Sklearn Scorer object.
It's direction is determined by the metric itself.
Parameters
----------
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
Returns
-------
float
"""
score = _compute_scorer(
solution=solution,
prediction=prediction,
metric=metric,
task_type=task_type,
)
return metric._sign * score


def _compute_scorer(
metric: Scorer,
prediction: np.ndarray,
solution: np.ndarray,
task_type: int
) -> float:
"""
Returns a score (a magnitude that allows casting the
optimization problem as a maximization one) for the
given Auto-Sklearn Scorer object
Parameters
----------
solution: np.ndarray
The ground truth of the targets
prediction: np.ndarray
The best estimate from the model, of the given targets
task_type: int
To understand if the problem task is classification
or regression
metric: Scorer
Object that host a function to calculate how good the
prediction is according to the solution.
Returns
-------
float
"""
if task_type in REGRESSION_TASKS:
# TODO put this into the regression metric itself
cprediction = sanitize_array(prediction)
score = metric(solution, cprediction)
else:
score = metric(solution, prediction)
return score
2 changes: 1 addition & 1 deletion test/test_automl/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ def test_load_best_individual_model(metric, backend, dask_client):
assert automl.score(X_test, Y_test) > 0.9
elif metric.name == 'log_loss':
# Seen values in github actions of 0.6978304740364537
assert automl.score(X_test, Y_test) <= 0.72
assert automl.score(X_test, Y_test) < 0.7
else:
raise ValueError(metric.name)

Expand Down
36 changes: 29 additions & 7 deletions test/test_metric/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import autosklearn.metrics

from autosklearn.metrics import calculate_score, calculate_loss
from autosklearn.metrics import calculate_score, calculate_loss, calculate_metric
from autosklearn.constants import BINARY_CLASSIFICATION, REGRESSION

from smac.utils.constants import MAXINT
Expand Down Expand Up @@ -536,7 +536,7 @@ def test_regression_only_metric(self):

def test_calculate_loss():
# In a 0-1 ranged scorer, make sure that the loss
# has a expected positive value
# has an expected positive value
y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
score = sklearn.metrics.accuracy_score(y_true, y_pred)
Expand All @@ -546,8 +546,7 @@ def test_calculate_loss():
task_type=BINARY_CLASSIFICATION,
metric=autosklearn.metrics.accuracy,
)
loss = 1.0 - score
assert pytest.approx(loss) == calculate_loss(
assert pytest.approx(1.0 - score) == calculate_loss(
solution=y_true,
prediction=y_pred,
task_type=BINARY_CLASSIFICATION,
Expand Down Expand Up @@ -582,14 +581,37 @@ def test_calculate_loss():
y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
score = sklearn.metrics.mean_squared_error(y_true, y_pred)
assert pytest.approx(score) == calculate_score(
assert pytest.approx(0 - score) == calculate_score(
solution=y_true,
prediction=y_pred,
task_type=REGRESSION,
metric=autosklearn.metrics.mean_squared_error,
)
assert pytest.approx(score) == calculate_loss(
solution=y_true,
prediction=y_pred,
task_type=REGRESSION,
metric=autosklearn.metrics.mean_squared_error,
)
loss = score
assert pytest.approx(loss) == calculate_loss(


def test_calculate_metric():
# metric to be maximized
y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
score = sklearn.metrics.accuracy_score(y_true, y_pred)
assert pytest.approx(score) == calculate_metric(
solution=y_true,
prediction=y_pred,
task_type=BINARY_CLASSIFICATION,
metric=autosklearn.metrics.accuracy,
)

# metric to be minimized
y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
score = sklearn.metrics.mean_squared_error(y_true, y_pred)
assert pytest.approx(score) == calculate_metric(
solution=y_true,
prediction=y_pred,
task_type=REGRESSION,
Expand Down

0 comments on commit c2b0e73

Please sign in to comment.