Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi-objective ensemble API #1485

Merged
merged 5 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1779,12 +1779,12 @@ def _get_runhistory_models_performance(self):
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_value.endtime))
)
cost = run_value.cost
train_loss = run_value.additional_info["train_loss"]
if len(self._metrics) > 1:
cost = cost[0]
train_loss = train_loss[0]
val_score = metric._optimum - (metric._sign * cost)
train_score = metric._optimum - (
metric._sign * run_value.additional_info["train_loss"]
)
train_score = metric._optimum - (metric._sign * train_loss)
scores = {
"Timestamp": endtime,
"single_best_optimization_score": val_score,
Expand Down
7 changes: 7 additions & 0 deletions autosklearn/pipeline/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,17 @@ def get_dataset(
train_size_maximum=150,
make_multilabel=False,
make_binary=False,
return_target_as_string=False,
):
iris = getattr(sklearn.datasets, "load_%s" % dataset)()
X = iris.data.astype(np.float32)
Y = iris.target

if return_target_as_string:
if make_binary or make_multilabel or (len(Y.shape) > 1 and Y.shape[1] > 1):
raise NotImplementedError()
Y = np.array([iris.target_names[y] for y in Y])

rs = np.random.RandomState(42)
indices = np.arange(X.shape[0])
train_size = min(int(len(indices) / 3.0 * 2.0), train_size_maximum)
Expand Down
2 changes: 2 additions & 0 deletions test/fixtures/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def _make(
task: Optional[int] = None,
feat_type: Optional[Dict | str] = None,
as_datamanager: bool = False,
return_target_as_string: bool = False,
) -> Any:
X, y, Xt, yt = get_dataset(
dataset=name,
Expand All @@ -92,6 +93,7 @@ def _make(
train_size_maximum=train_size_maximum,
make_multilabel=make_multilabel,
make_binary=make_binary,
return_target_as_string=return_target_as_string,
)

if not as_datamanager:
Expand Down
44 changes: 44 additions & 0 deletions test/test_automl/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import numpy as np

import autosklearn.metrics
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
from autosklearn.automl_common.common.utils.backend import Backend

Expand Down Expand Up @@ -133,6 +134,49 @@ def case_classifier_fitted_cv(
return model


@case(tags=["classifier", "fitted", "holdout", "cached", "multiobjective"])
@parametrize("dataset", ["iris"])
def case_classifier_fitted_holdout_multiobjective(
dataset: str,
make_cache: Callable[[str], Cache],
make_backend: Callable[..., Backend],
make_automl_classifier: Callable[..., AutoMLClassifier],
make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]],
) -> AutoMLClassifier:
"""Case of a holdout fitted classifier"""
resampling_strategy = "holdout"

key = f"case_classifier_{resampling_strategy}_{dataset}"

# This locks the cache for this item while we check, required for pytest-xdist
with make_cache(key) as cache:
if "model" not in cache:
# Make the model in the cache
model = make_automl_classifier(
temporary_directory=cache.path("backend"),
delete_tmp_folder_after_terminate=False,
resampling_strategy=resampling_strategy,
metrics=[
autosklearn.metrics.balanced_accuracy,
autosklearn.metrics.log_loss,
],
)

X, y, Xt, yt = make_sklearn_dataset(
name=dataset, return_target_as_string=True
)
model.fit(X, y, dataset_name=dataset)

# Save the model
cache.save(model, "model")

# Try the model from the cache
model = cache.load("model")
model._backend = copy_backend(old=model._backend, new=make_backend())

return model


@case(tags=["regressor", "fitted", "holdout", "cached"])
@parametrize("dataset", ["boston"])
def case_regressor_fitted_holdout(
Expand Down
23 changes: 13 additions & 10 deletions test/test_automl/test_dummy_predictions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Callable, Tuple
from __future__ import annotations

from typing import Callable, Sequence, Tuple

from pathlib import Path

Expand All @@ -12,7 +14,7 @@
REGRESSION,
)
from autosklearn.data.xy_data_manager import XYDataManager
from autosklearn.metrics import Scorer, accuracy, precision, r2
from autosklearn.metrics import Scorer, accuracy, log_loss, precision, r2
from autosklearn.util.logging_ import PicklableClientLogger

import pytest
Expand All @@ -21,17 +23,18 @@


@parametrize(
"dataset, metric, task",
"dataset, metrics, task",
[
("breast_cancer", accuracy, BINARY_CLASSIFICATION),
("wine", accuracy, MULTICLASS_CLASSIFICATION),
("diabetes", r2, REGRESSION),
("breast_cancer", [accuracy], BINARY_CLASSIFICATION),
("breast_cancer", [accuracy, log_loss], BINARY_CLASSIFICATION),
("wine", [accuracy], MULTICLASS_CLASSIFICATION),
("diabetes", [r2], REGRESSION),
],
)
def test_produces_correct_output(
dataset: str,
task: int,
metric: Scorer,
metrics: Sequence[Scorer],
mock_logger: PicklableClientLogger,
make_automl: Callable[..., AutoML],
make_sklearn_dataset: Callable[..., XYDataManager],
Expand All @@ -45,8 +48,8 @@ def test_produces_correct_output(
task : int
The task type of the dataset

metric: Scorer
Metric to use, required as fit usually determines the metric to use
metrics: Sequence[Scorer]
Metric(s) to use, required as fit usually determines the metric to use

Fixtures
--------
Expand All @@ -66,7 +69,7 @@ def test_produces_correct_output(
* It should produce predictions "predictions_ensemble_1337_1_0.0.npy"
"""
seed = 1337
automl = make_automl(metrics=[metric], seed=seed)
automl = make_automl(metrics=metrics, seed=seed)
automl._logger = mock_logger

datamanager = make_sklearn_dataset(
Expand Down
32 changes: 32 additions & 0 deletions test/test_automl/test_post_fit.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import numpy as np
from sklearn.ensemble import VotingClassifier, VotingRegressor

from autosklearn.automl import AutoML

from pytest_cases import parametrize_with_cases
Expand Down Expand Up @@ -66,3 +69,32 @@ def test_no_ensemble(automl: AutoML) -> None:
assert automl.ensemble_ is None
assert len(automl.models_) == 0
assert len(automl.cv_models_) == 0


@parametrize_with_cases("automl", cases, has_tag=["multiobjective"])
def test__load_pareto_front(automl: AutoML) -> None:
"""
Parameters
----------
automl : AutoML
An AutoML object fitted with multiple objective metrics

Expects
-------
* Auto-sklearn can predict and has a model
* _load_pareto_front returns one scikit-learn ensemble
"""
# Check that the predict function works
X = np.array([[1.0, 1.0, 1.0, 1.0]])
print(automl.predict(X))
assert automl.predict_proba(X).shape == (1, 3)
assert automl.predict(X).shape == (1,)

pareto_front = automl._load_pareto_front()
assert len(pareto_front) == 1
for ensemble in pareto_front:
assert isinstance(ensemble, (VotingClassifier, VotingRegressor))
y_pred = ensemble.predict_proba(X)
assert y_pred.shape == (1, 3)
y_pred = ensemble.predict(X)
assert y_pred in ["setosa", "versicolor", "virginica"]
Copy link
Contributor

@eddiebergman eddiebergman May 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not very clear why this should be only one scikit learn ensemble expected but I assume it's because of the default parameter for ensemble selection.

It also seems this test is very specific to this single case (fitted multiobjective iris classifier).

I had the same problem when considering cases and my solution was just to have general tests. We can just push this through for now, knowing it will break if we add any other cases with the "multiobjective" tag.

The longer term solution to this, I have a few ideas:

  • We just use make_automl, make_dataset and construct the specific automl instance for this test where the specifics that are being tested are directly evident in the test. Same as old way of doing things and leads to no caching but at least all relevant setup assumptions are stated clearly in test.
  • We encode these extra specifics somehow:
    • The case just returns extra info
    def case_classifier_fitted_holdout_multiobjective(...):
        ...
        return (model, extra_info)
    • The extra specifics are directly saved and access on the model object. This does add a lot more introspection capabilites to the model which may be helpful for future additions

Happy to hear any other ideas on this though, I admit the caching solution as is, is not perfect for this reason, but it does allow the tests to be a lot more modular.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not very clear why this should be only one scikit learn ensemble expected but I assume it's because of the default parameter for ensemble selection.

Correct.

It also seems this test is very specific to this single case (fitted multiobjective iris classifier).

Correct as well.

I had the same problem when considering cases and my solution was just to have general tests. We can just push this through for now, knowing it will break if we add any other cases with the "multiobjective" tag.

Very glad you see it this way.

Happy to hear any other ideas on this though

Would we for the 2nd idea check whether the AutoML was built on iris and then use it? Besides that, could we maybe add a filter on which dataset(s) were used to build the AutoML system?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we for the 2nd idea check whether the AutoML was built on iris and then use it? Besides that, could we maybe add a filter on which dataset(s) were used to build the AutoML system?

Yup it's definitely possible, the easiest way is to just do so in the test itself, i.e. if extra_info["dataset"] != "iris": pass but I'm not the biggest fan of the solution.

The overarching problem is that you can't use @parametrize and @tags together, i.e. you can't associate a parameter with a tag.

I guess my prefered solution is to include more general things in the extra_info or encode it on the model, meaning the tests don't have to do any filtering.

extra_info = {
    "X_shape": X.shape,
    "y_shape": y.shape,
    "labels": ...
}
return (automl, extra_info)

It's not the cleanest but at least it means this test could theoretically work for any other "multiobjective" tagged case, as long as it provides the necessary extra_info.