automl · mfeurer · May 12, 2022 · May 3, 2022 · May 4, 2022 · May 4, 2022
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Callable, Iterable, Mapping, Optional, Tuple
+from typing import Any, Callable, Iterable, Mapping, Optional, Sequence, Tuple
 
 import copy
 import io
@@ -210,7 +210,7 @@ def __init__(
         get_smac_object_callback: Optional[Callable] = None,
         smac_scenario_args: Optional[Mapping] = None,
         logging_config: Optional[Mapping] = None,
-        metric: Optional[Scorer] = None,
+        metric: Optional[Scorer | Sequence[Scorer]] = None,
         scoring_functions: Optional[list[Scorer]] = None,
         get_trials_callback: Optional[IncorporateRunResultCallback] = None,
         dataset_compression: bool | Mapping[str, Any] = True,
@@ -265,7 +265,7 @@ def __init__(
             initial_configurations_via_metalearning
         )
 
-        self._scoring_functions = scoring_functions or {}
+        self._scoring_functions = scoring_functions or []
         self._resampling_strategy_arguments = resampling_strategy_arguments or {}
 
         # Single core, local runs should use fork to prevent the __main__ requirements
@@ -447,7 +447,9 @@ def _do_dummy_prediction(self) -> None:
             resampling_strategy=self._resampling_strategy,
             initial_num_run=dummy_run_num,
             stats=stats,
-            metric=self._metric,
+            metrics=(
+                [self._metric] if isinstance(self._metric, Scorer) else self._metric
+            ),
             memory_limit=memory_limit,
             disable_file_output=self._disable_evaluator_output,
             abort_on_first_run_crash=False,
@@ -692,10 +694,14 @@ def fit(
         # defined in the estimator fit call
         if self._metric is None:
             raise ValueError("No metric given.")
-        if not isinstance(self._metric, Scorer):
-            raise ValueError(
-                "Metric must be instance of " "autosklearn.metrics.Scorer."
-            )
+        if isinstance(self._metric, Sequence):
+            for entry in self._metric:
+                if not isinstance(entry, Scorer):
+                    raise ValueError(
+                        "Metric must be instance of autosklearn.metrics.Scorer."
+                    )
+        elif not isinstance(self._metric, Scorer):
+            raise ValueError("Metric must be instance of autosklearn.metrics.Scorer.")
 
         # If no dask client was provided, we create one, so that we can
         # start a ensemble process in parallel to smbo optimize
@@ -790,7 +796,11 @@ def fit(
                     backend=copy.deepcopy(self._backend),
                     dataset_name=dataset_name,
                     task=self._task,
-                    metric=self._metric,
+                    metric=(
+                        self._metric[0]
+                        if isinstance(self._metric, Sequence)
+                        else self._metric
+                    ),
                     ensemble_size=self._ensemble_size,
                     ensemble_nbest=self._ensemble_nbest,
                     max_models_on_disc=self._max_models_on_disc,
@@ -1289,7 +1299,13 @@ def fit_pipeline(
         if "resampling_strategy" not in kwargs:
             kwargs["resampling_strategy"] = self._resampling_strategy
         if "metric" not in kwargs:
-            kwargs["metric"] = self._metric
+            kwargs["metric"] = (
+                [self._metric] if isinstance(self._metric, Scorer) else self._metric
+            )
+        elif "metric" in kwargs and isinstance(kwargs["metric"], Scorer):
+            kwargs["metric"] = [kwargs["metric"]]
+        kwargs["metrics"] = kwargs["metric"]
+        del kwargs["metric"]
         if "disable_file_output" not in kwargs:
             kwargs["disable_file_output"] = self._disable_evaluator_output
         if "pynisher_context" not in kwargs:
@@ -1307,7 +1323,7 @@ def fit_pipeline(
             autosklearn_seed=self._seed,
             abort_on_first_run_crash=False,
             multi_objectives=["cost"],
-            cost_for_crash=get_cost_of_crash(kwargs["metric"]),
+            cost_for_crash=get_cost_of_crash(kwargs["metrics"]),
             port=self._logger_port,
             **kwargs,
             **self._resampling_strategy_arguments,
@@ -1492,7 +1508,9 @@ def fit_ensemble(
             backend=copy.deepcopy(self._backend),
             dataset_name=dataset_name if dataset_name else self._dataset_name,
             task=task if task else self._task,
-            metric=self._metric,
+            metric=(
+                self._metric[0] if isinstance(self._metric, Sequence) else self._metric
+            ),
             ensemble_size=ensemble_size if ensemble_size else self._ensemble_size,
             ensemble_nbest=ensemble_nbest if ensemble_nbest else self._ensemble_nbest,
             max_models_on_disc=self._max_models_on_disc,
@@ -1632,7 +1650,7 @@ def score(self, X, y):
         )
 
     def _get_runhistory_models_performance(self):
-        metric = self._metric
+        metric = self._metric if isinstance(self._metric, Scorer) else self._metric[0]
         data = self.runhistory_.data
         performance_list = []
         for run_key, run_value in data.items():
@@ -1644,7 +1662,10 @@ def _get_runhistory_models_performance(self):
             endtime = pd.Timestamp(
                 time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_value.endtime))
             )
-            val_score = metric._optimum - (metric._sign * run_value.cost)
+            cost = run_value.cost
+            if not isinstance(self._metric, Scorer):
+                cost = cost[0]
+            val_score = metric._optimum - (metric._sign * cost)
             train_score = metric._optimum - (
                 metric._sign * run_value.additional_info["train_loss"]
             )
@@ -1656,9 +1677,10 @@ def _get_runhistory_models_performance(self):
             # Append test-scores, if data for test_loss are available.
             # This is the case, if X_test and y_test where provided.
             if "test_loss" in run_value.additional_info:
-                test_score = metric._optimum - (
-                    metric._sign * run_value.additional_info["test_loss"]
-                )
+                test_loss = run_value.additional_info["test_loss"]
+                if not isinstance(self._metric, Scorer):
+                    test_loss = test_loss[0]
+                test_score = metric._optimum - (metric._sign * test_loss)
                 scores["single_best_test_score"] = test_score
 
             performance_list.append(scores)
@@ -1912,7 +1934,6 @@ def show_models(self) -> dict[int, Any]:
         .. code-block:: python
 
             import sklearn.datasets
-            import sklearn.metrics
             import autosklearn.regression
 
             X, y = sklearn.datasets.load_diabetes(return_X_y=True)

diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
@@ -1003,9 +1003,9 @@ def compute_loss_per_model(self):
                     solution=self.y_true_ensemble,
                     prediction=y_ensemble,
                     task_type=self.task_type,
-                    metric=self.metric,
+                    metrics=[self.metric],
                     scoring_functions=None,
-                )
+                )[self.metric.name]
 
                 if np.isfinite(self.read_losses[y_ens_fn]["ens_loss"]):
                     self.logger.debug(
@@ -1515,9 +1515,9 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
                 solution=self.y_true_ensemble,
                 prediction=train_pred,
                 task_type=self.task_type,
-                metric=self.metric,
+                metrics=[self.metric],
                 scoring_functions=None,
-            ),
+            )[self.metric.name],
         }
         if valid_pred is not None:
             # TODO: valid_pred are a legacy from competition manager
@@ -1526,19 +1526,19 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
                 solution=self.y_valid,
                 prediction=valid_pred,
                 task_type=self.task_type,
-                metric=self.metric,
+                metrics=[self.metric],
                 scoring_functions=None,
-            )
+            )[self.metric.name]
 
         # In case test_pred was provided
         if test_pred is not None:
             performance_stamp["ensemble_test_score"] = calculate_score(
                 solution=self.y_test,
                 prediction=test_pred,
                 task_type=self.task_type,
-                metric=self.metric,
+                metrics=[self.metric],
                 scoring_functions=None,
-            )
+            )[self.metric.name]
 
         self.ensemble_history.append(performance_stamp)
 

diff --git a/autosklearn/ensembles/ensemble_selection.py b/autosklearn/ensembles/ensemble_selection.py
@@ -164,18 +164,13 @@ def _fast(
                     out=fant_ensemble_prediction,
                 )
 
-                # calculate_loss is versatile and can return a dict of losses
-                # when scoring_functions=None, we know it will be a float
-                losses[j] = cast(
-                    float,
-                    calculate_loss(
-                        solution=labels,
-                        prediction=fant_ensemble_prediction,
-                        task_type=self.task_type,
-                        metric=self.metric,
-                        scoring_functions=None,
-                    ),
-                )
+                losses[j] = calculate_loss(
+                    solution=labels,
+                    prediction=fant_ensemble_prediction,
+                    task_type=self.task_type,
+                    metrics=[self.metric],
+                    scoring_functions=None,
+                )[self.metric.name]
 
             all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
 

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -1,5 +1,7 @@
 # -*- encoding: utf-8 -*-
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
+from __future__ import annotations
+
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
 
 import dask.distributed
 import joblib
@@ -46,7 +48,7 @@ def __init__(
         smac_scenario_args=None,
         logging_config=None,
         metadata_directory=None,
-        metric=None,
+        metric: Optional[Scorer | Sequence[Scorer]] = None,
         scoring_functions: Optional[List[Scorer]] = None,
         load_models: bool = True,
         get_trials_callback=None,
@@ -792,6 +794,9 @@ def leaderboard(
             What column to sort by. If that column is not present, the
             sorting defaults to the ``"model_id"`` index column.
 
+            Defaults to the metric optimized. Sort by the first objective
+            in case of a multi-objective optimization problem
+
         sort_order: "auto" or "ascending" or "descending" = "auto"
             Which sort order to apply to the ``sort_by`` column. If left
             as ``"auto"``, it will sort by a sensible default where "better" is
@@ -884,7 +889,7 @@ def has_key(rv, key):
                 "start_time": rval.starttime,
                 "end_time": rval.endtime,
                 "status": str(rval.status),
-                "cost": rval.cost,
+                "cost": rval.cost if isinstance(rval.cost, float) else rval.cost[0],
                 "train_loss": rval.additional_info["train_loss"]
                 if has_key(rval, "train_loss")
                 else None,

diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py
@@ -1,5 +1,18 @@
 # -*- encoding: utf-8 -*-
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast
+from __future__ import annotations
+
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
 
 import functools
 import json
@@ -85,11 +98,17 @@ def fit_predict_try_except_decorator(
         queue.close()
 
 
-def get_cost_of_crash(metric: Scorer) -> float:
+def get_cost_of_crash(
+    metric: Union[Scorer | Sequence[Scorer]],
+) -> Union[float, List[float]]:
 
-    # The metric must always be defined to extract optimum/worst
-    if not isinstance(metric, Scorer):
-        raise ValueError("The metric must be stricly be an instance of Scorer")
+    if isinstance(metric, Sequence):
+        return [cast(float, get_cost_of_crash(metric_)) for metric_ in metric]
+    elif not isinstance(metric, Scorer):
+        raise ValueError(
+            "The metric must be stricly be an instance of Scorer or a sequence of "
+            "Scorers"
+        )
 
     # Autosklearn optimizes the err. This function translates
     # worst_possible_result to be a minimization problem.
@@ -126,7 +145,7 @@ def __init__(
         resampling_strategy: Union[
             str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
         ],
-        metric: Scorer,
+        metrics: Sequence[Scorer],
         cost_for_crash: float,
         abort_on_first_run_crash: bool,
         port: int,
@@ -144,7 +163,7 @@ def __init__(
         disable_file_output: bool = False,
         init_params: Optional[Dict[str, Any]] = None,
         budget_type: Optional[str] = None,
-        ta: Optional[Callable] = None,
+        ta: Optional[Callable] = None,  # Required by SMAC's parent class
         **resampling_strategy_args: Any,
     ):
         if resampling_strategy == "holdout":
@@ -186,13 +205,14 @@ def __init__(
             par_factor=par_factor,
             cost_for_crash=self.worst_possible_result,
             abort_on_first_run_crash=abort_on_first_run_crash,
+            multi_objectives=multi_objectives,
         )
 
         self.backend = backend
         self.autosklearn_seed = autosklearn_seed
         self.resampling_strategy = resampling_strategy
         self.initial_num_run = initial_num_run
-        self.metric = metric
+        self.metrics = metrics
         self.resampling_strategy = resampling_strategy
         self.resampling_strategy_args = resampling_strategy_args
         self.scoring_functions = scoring_functions
@@ -356,7 +376,7 @@ def run(
             config=config,
             backend=self.backend,
             port=self.port,
-            metric=self.metric,
+            metrics=self.metrics,
             seed=self.autosklearn_seed,
             num_run=num_run,
             scoring_functions=self.scoring_functions,
@@ -550,4 +570,32 @@ def run(
 
         autosklearn.evaluation.util.empty_queue(queue)
         self.logger.info("Finished evaluating configuration %d" % config_id)
+
+        # Do some sanity checking (for multi objective)
+        if len(self.multi_objectives) > 1:
+            error = (
+                f"Returned costs {cost} does not match the number of objectives"
+                f" {len(self.multi_objectives)}."
+            )
+
+            # If dict convert to array
+            # Make sure the ordering is correct
+            if isinstance(cost, dict):
+                ordered_cost = []
+                for name in self.multi_objectives:
+                    if name not in cost:
+                        raise RuntimeError(
+                            f"Objective {name} was not found in the returned costs."
+                        )
+
+                    ordered_cost.append(cost[name])
+                cost = ordered_cost
+
+            if isinstance(cost, list):
+                if len(cost) != len(self.multi_objectives):
+                    raise RuntimeError(error)
+
+            if isinstance(cost, float):
+                raise RuntimeError(error)
+
         return status, cost, runtime, additional_run_info