Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix moo things #1501

Merged
merged 13 commits into from
Jun 14, 2022
90 changes: 67 additions & 23 deletions autosklearn/ensemble_building/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def __init__(
self.validation_performance_ = np.inf

# Data we may need
# TODO: The test data is needlessly loaded but automl_common has no concept of
# these and is perhaps too rigid
datamanager: XYDataManager = self.backend.load_datamanager()
self._X_test: SUPPORTED_FEAT_TYPES | None = datamanager.data.get("X_test", None)
self._y_test: np.ndarray | None = datamanager.data.get("Y_test", None)
Expand Down Expand Up @@ -442,6 +444,17 @@ def main(
self.logger.debug("Found no runs")
raise RuntimeError("Found no runs")

# We load in `X_data` if we need it
if any(m._needs_X for m in self.metrics):
ensemble_X_data = self.X_data("ensemble")

if ensemble_X_data is None:
msg = "No `X_data` for 'ensemble' which was required by metrics"
self.logger.debug(msg)
raise RuntimeError(msg)
else:
ensemble_X_data = None

# Calculate the loss for those that require it
requires_update = self.requires_loss_update(runs)
if self.read_at_most is not None:
Expand All @@ -450,9 +463,7 @@ def main(
for run in requires_update:
run.record_modified_times() # So we don't count as modified next time
run.losses = {
metric.name: self.loss(
run, metric=metric, X_data=self.X_data("ensemble")
)
metric.name: self.loss(run, metric=metric, X_data=ensemble_X_data)
for metric in self.metrics
}

Expand Down Expand Up @@ -549,15 +560,14 @@ def main(
return self.ensemble_history, self.ensemble_nbest

targets = cast(np.ndarray, self.targets("ensemble")) # Sure they exist
X_data = self.X_data("ensemble")

ensemble = self.fit_ensemble(
candidates=candidates,
X_data=X_data,
targets=targets,
runs=runs,
ensemble_class=self.ensemble_class,
ensemble_kwargs=self.ensemble_kwargs,
X_data=ensemble_X_data,
task=self.task_type,
metrics=self.metrics,
precision=self.precision,
Expand Down Expand Up @@ -587,7 +597,15 @@ def main(

run_preds = [r.predictions(kind, precision=self.precision) for r in models]
pred = ensemble.predict(run_preds)
X_data = self.X_data(kind)

if any(m._needs_X for m in self.metrics):
X_data = self.X_data(kind)
if X_data is None:
msg = f"No `X` data for '{kind}' which was required by metrics"
self.logger.debug(msg)
raise RuntimeError(msg)
else:
X_data = None

scores = calculate_scores(
solution=pred_targets,
Expand All @@ -597,10 +615,11 @@ def main(
X_data=X_data,
scoring_functions=None,
)
performance_stamp[f"ensemble_{score_name}_score"] = scores[
self.metrics[0].name
]
self.ensemble_history.append(performance_stamp)

performance_stamp[f"ensemble_{score_name}_score"] = scores

# Add the performance stamp to the history
self.ensemble_history.append(performance_stamp)
Comment on lines +630 to +632
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BUGFIX: Marking this to match PR description, used to be inside the for loop but should have been outside


# Lastly, delete any runs that need to be deleted. We save this as the last step
# so that we have an ensemble saved that is up to date. If we do not do so,
Expand Down Expand Up @@ -805,13 +824,13 @@ def candidate_selection(

def fit_ensemble(
self,
candidates: list[Run],
X_data: SUPPORTED_FEAT_TYPES,
targets: np.ndarray,
candidates: Sequence[Run],
runs: Sequence[Run],
*,
runs: list[Run],
targets: np.ndarray | None = None,
ensemble_class: Type[AbstractEnsemble] = EnsembleSelection,
ensemble_kwargs: Dict[str, Any] | None = None,
X_data: SUPPORTED_FEAT_TYPES | None = None,
task: int | None = None,
metrics: Sequence[Scorer] | None = None,
precision: int | None = None,
Expand All @@ -825,24 +844,24 @@ def fit_ensemble(

Parameters
----------
candidates: list[Run]
candidates: Sequence[Run]
List of runs to build an ensemble from

X_data: SUPPORTED_FEAT_TYPES
The base level data.
runs: Sequence[Run]
List of all runs (also pruned ones and dummy runs)

targets: np.ndarray
targets: np.ndarray | None = None
The targets to build the ensemble with

runs: list[Run]
List of all runs (also pruned ones and dummy runs)

ensemble_class: AbstractEnsemble
Implementation of the ensemble algorithm.

ensemble_kwargs: Dict[str, Any]
ensemble_kwargs: Dict[str, Any] | None
Arguments passed to the constructor of the ensemble algorithm.

X_data: SUPPORTED_FEAT_TYPES | None = None
The base level data.

task: int | None = None
The kind of task performed

Expand Down Expand Up @@ -870,6 +889,27 @@ def fit_ensemble(
metrics = metrics if metrics is not None else self.metrics
rs = random_state if random_state is not None else self.random_state

# Validate that kwargs doesn't have duplicates
params = {"task_type", "metrics", "random_state", "backend"}
duplicates = ensemble_kwargs.keys() & params
if any(duplicates):
raise ValueError(f"Can't provide {duplicates} in `ensemble_kwargs`")

# Validate we have targets if None specified
if targets is None:
targets = self.targets("ensemble")
if targets is None:
path = self.backend._get_targets_ensemble_filename()
raise ValueError(f"`fit_ensemble` could not find any targets at {path}")

# Validate when we have no X_data that we can load it if we need
if X_data is None and any(m._needs_X for m in metrics):
X_data = self.X_data("ensemble")
if X_data is None:
msg = "No `X_data` for `fit_ensemble` which was required by metrics"
self.logger.debug(msg)
raise RuntimeError(msg)

ensemble = ensemble_class(
task_type=task,
metrics=metrics,
Expand Down Expand Up @@ -995,7 +1035,8 @@ def loss(
self,
run: Run,
metric: Scorer,
X_data: SUPPORTED_FEAT_TYPES,
*,
X_data: SUPPORTED_FEAT_TYPES | None = None,
kind: str = "ensemble",
) -> float:
"""Calculate the loss for a run
Expand All @@ -1008,6 +1049,9 @@ def loss(
metric: Scorer
The metric to calculate the loss of

X_data: SUPPORTED_FEAT_TYPES | None = None
Any X_data required to be passed to the metric

kind: str = "ensemble"
The kind of targets to use for the run

Expand Down
4 changes: 4 additions & 0 deletions test/fixtures/ensemble_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ def _make(
backend.save_additional_data(
datamanager.data["Y_train"], what="targets_ensemble"
)
if "X_train" in datamanager.data:
backend.save_additional_data(
datamanager.data["X_train"], what="input_ensemble"
)

builder = EnsembleBuilder(
backend=backend,
Expand Down
26 changes: 26 additions & 0 deletions test/fixtures/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Any

import numpy as np

from autosklearn.metrics import accuracy, make_scorer


def _accuracy_requiring_X_data(
y_true: np.ndarray,
y_pred: np.ndarray,
X_data: Any,
) -> float:
"""Dummy metric that needs X Data"""
if X_data is None:
raise ValueError()
return accuracy(y_true, y_pred)


acc_with_X_data = make_scorer(
name="acc_with_X_data",
score_func=_accuracy_requiring_X_data,
needs_X=True,
optimum=1,
worst_possible_result=0,
greater_is_better=True,
)
99 changes: 94 additions & 5 deletions test/test_ensemble_builder/test_ensemble_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@

from autosklearn.automl_common.common.utils.backend import Backend
from autosklearn.ensemble_building import EnsembleBuilder, Run
from autosklearn.metrics import make_scorer
from autosklearn.metrics import Scorer, accuracy, make_scorer
from autosklearn.util.functional import bound, pairs

import pytest
from pytest_cases import fixture, parametrize
from unittest.mock import patch

from test.conftest import DEFAULT_SEED
from test.fixtures.metrics import acc_with_X_data


@fixture
Expand Down Expand Up @@ -673,6 +674,49 @@ def test_delete_runs_does_not_delete_dummy(
assert set(loaded.values()) == set(dummy_runs)


@parametrize("key", ["task_type", "metrics", "random_state", "backend"])
def test_fit_ensemble_with_duplicate_params_in_ensemble_kwargs(
builder: EnsembleBuilder,
make_run: Callable[..., Run],
key: str,
) -> None:
"""
Expects
-------
* Should raise an error if user has parameters in ensemble_kwargs that
are meant to be explicit params
"""
kwargs = {key: None}
candidates = [make_run(backend=builder.backend) for _ in range(5)]
with pytest.raises(ValueError, match="Can't provide .*"):
builder.fit_ensemble(
candidates=candidates,
runs=candidates,
ensemble_kwargs=kwargs,
)


def test_fit_ensemble_with_no_targets_raises(
builder: EnsembleBuilder,
make_run: Callable[..., Run],
) -> None:
"""
Expects
-------
* If no ensemble targets can be found then `fit_ensemble` should fail
"""
# Delete the targets and then try fit ensemble
targets_path = Path(builder.backend._get_targets_ensemble_filename())
targets_path.unlink()

candidates = [make_run(backend=builder.backend) for _ in range(5)]
with pytest.raises(ValueError, match="`fit_ensemble` could not find any .*"):
builder.fit_ensemble(
candidates=candidates,
runs=candidates,
)


def test_fit_ensemble_produces_ensemble(
builder: EnsembleBuilder,
make_run: Callable[..., Run],
Expand All @@ -682,16 +726,13 @@ def test_fit_ensemble_produces_ensemble(
-------
* Should produce an ensemble if all runs have predictions
"""
X_data = builder.X_data("ensemble")
targets = builder.targets("ensemble")
assert targets is not None

predictions = targets
runs = [make_run(predictions={"ensemble": predictions}) for _ in range(10)]

ensemble = builder.fit_ensemble(
candidates=runs, X_data=X_data, targets=targets, runs=runs
)
ensemble = builder.fit_ensemble(candidates=runs, runs=runs)

assert ensemble is not None

Expand Down Expand Up @@ -823,3 +864,51 @@ def test_deletion_will_not_break_current_ensemble(

for run in new_runs:
assert run in available_runs


@parametrize("metrics", [accuracy, acc_with_X_data, [accuracy, acc_with_X_data]])
def test_will_build_ensemble_with_different_metrics(
make_ensemble_builder: Callable[..., EnsembleBuilder],
make_run: Callable[..., Run],
metrics: Scorer | list[Scorer],
) -> None:
"""
Expects
-------
* Should be able to build a valid ensemble with different combinations of metrics
* Should produce a validation score for both "ensemble" and "test" scores
"""
if not isinstance(metrics, list):
metrics = [metrics]

builder = make_ensemble_builder(metrics=metrics)

# Make some runs and stick them in the same backend as the builder
# Dummy just has a terrible loss for all metrics
make_run(
dummy=True,
losses={m.name: 1000 for m in metrics},
backend=builder.backend,
)

# "Proper" runs will have the correct targets and so be better than dummy
run_predictions = {
"ensemble": builder.targets("ensemble"),
"test": builder.targets("test"),
}
for _ in range(5):
make_run(predictions=run_predictions, backend=builder.backend)

history, nbest = builder.main()

# Should only produce one step
assert len(history) == 1
hist = history[0]

# Each of these two keys should be present
for key in ["ensemble_optimization_score", "ensemble_test_score"]:
assert key in hist

# Each of these scores should contain all the metrics
for metric in metrics:
assert metric.name in hist[key]