From facef6b67e23ab0c5ee15e28a82c0c9e7b8fb983 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 9 Nov 2020 14:30:51 +0100 Subject: [PATCH 1/4] Fix encoding of exit status of the pynisher (#1001) --- autosklearn/evaluation/__init__.py | 53 +++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py index 2c369149fe..541e782f29 100644 --- a/autosklearn/evaluation/__init__.py +++ b/autosklearn/evaluation/__init__.py @@ -1,12 +1,14 @@ # -*- encoding: utf-8 -*- import functools +import json import math import multiprocessing from queue import Empty import time import traceback -from typing import Tuple +from typing import Dict, List, Optional, Tuple, Union +from ConfigSpace import Configuration import numpy as np import pynisher from smac.runhistory.runhistory import RunInfo, RunValue @@ -81,6 +83,14 @@ def get_cost_of_crash(metric): return worst_possible_result +def _encode_exit_status(exit_status): + try: + json.dumps(exit_status) + return exit_status + except (TypeError, OverflowError): + return str(exit_status) + + # TODO potentially log all inputs to this class to pickle them in order to do # easier debugging of potential crashes class ExecuteTaFuncWithQueue(AbstractTAFunc): @@ -224,11 +234,15 @@ def run_wrapper( return super().run_wrapper(run_info=run_info) - def run(self, config, instance=None, - cutoff=None, - seed=12345, - budget=0.0, - instance_specific=None): + def run( + self, + config: Configuration, + instance: Optional[str] = None, + cutoff: Optional[float] = None, + seed: int = 12345, + budget: float = 0.0, + instance_specific: Optional[str] = None, + ) -> Tuple[StatusType, float, float, Dict[str, Union[int, float, str, Dict, List, Tuple]]]: queue = multiprocessing.Queue() @@ -272,11 +286,19 @@ def run(self, config, instance=None, obj_kwargs['resampling_strategy'] = self.resampling_strategy obj_kwargs['resampling_strategy_args'] = self.resampling_strategy_args - obj = pynisher.enforce_limits(**arguments)(self.ta) - obj(**obj_kwargs) - - if obj.exit_status in (pynisher.TimeoutException, - pynisher.MemorylimitException): + try: + obj = pynisher.enforce_limits(**arguments)(self.ta) + obj(**obj_kwargs) + except Exception as e: + exception_traceback = traceback.format_exc() + error_message = repr(e) + additional_info = { + 'traceback': exception_traceback, + 'error': error_message + } + return StatusType.CRASHED, self.cost_for_crash, 0.0, additional_info + + if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException): # Even if the pynisher thinks that a timeout or memout occured, # it can be that the target algorithm wrote something into the queue # - then we treat it as a succesful run @@ -309,8 +331,7 @@ def run(self, config, instance=None, elif obj.exit_status is pynisher.MemorylimitException: status = StatusType.MEMOUT additional_run_info = { - 'error': 'Memout (used more than %d MB).' % - self.memory_limit + 'error': 'Memout (used more than %d MB).' % self.memory_limit } else: raise ValueError(obj.exit_status) @@ -322,7 +343,7 @@ def run(self, config, instance=None, cost = self.worst_possible_result additional_run_info = {'error': 'Your configuration of ' 'auto-sklearn does not work!', - 'exit_status': obj.exit_status, + 'exit_status': _encode_exit_status(obj.exit_status), 'subprocess_stdout': obj.stdout, 'subprocess_stderr': obj.stderr, } @@ -343,14 +364,14 @@ def run(self, config, instance=None, 'because the pynisher exit ' \ 'status %s is unknown.' % \ str(obj.exit_status) - additional_run_info['exit_status'] = obj.exit_status + additional_run_info['exit_status'] = _encode_exit_status(obj.exit_status) additional_run_info['subprocess_stdout'] = obj.stdout additional_run_info['subprocess_stderr'] = obj.stderr except Empty: info = None additional_run_info = { 'error': 'Result queue is empty', - 'exit_status': obj.exit_status, + 'exit_status': _encode_exit_status(obj.exit_status), 'subprocess_stdout': obj.stdout, 'subprocess_stderr': obj.stderr, 'exitcode': obj.exitcode From 5f5e0db1c5ac8eeddabf692ef94343a745d30efd Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Tue, 10 Nov 2020 10:46:50 +0100 Subject: [PATCH 2/4] FIX #989: pass y to data preprocessors --- .../components/data_preprocessing/data_preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py b/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py index 383b5c47a9..217c2dd361 100644 --- a/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py +++ b/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py @@ -102,7 +102,7 @@ def fit(self, X, y=None): transformers=sklearn_transf_spec, sparse_threshold=float(self.sparse_), ) - self.column_transformer.fit(X) + self.column_transformer.fit(X, y) return self def transform(self, X): From 1bb3d8836631bfddc0244c7e0b3d10090c050bc7 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Tue, 10 Nov 2020 10:47:29 +0100 Subject: [PATCH 3/4] MAINT #1000: minimal dask.distributed version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6b4a85e4ce..44eea1409a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ joblib scikit-learn>=0.22.0,<0.23 dask -distributed +distributed>=2.2.0 lockfile pyyaml pandas>=1.0 From 8e7b34b7f598c98d34df3c47e5c4f1a46fbbf9fc Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Tue, 10 Nov 2020 11:46:58 +0100 Subject: [PATCH 4/4] Prepare minor release --- autosklearn/__version__.py | 2 +- doc/releases.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py index 85dd3dc532..7fd2e63a53 100644 --- a/autosklearn/__version__.py +++ b/autosklearn/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.11.0" +__version__ = "0.11.1" diff --git a/doc/releases.rst b/doc/releases.rst index 65cd78c2f4..abc338e51a 100644 --- a/doc/releases.rst +++ b/doc/releases.rst @@ -11,6 +11,14 @@ Releases ======== +Version 0.11.1 +============== + +* FIX #989: Fixes a bug where `y` was not passed to all data preprocessors which made 3rd party + category encoders fail. +* FIX #1001: Fixes a bug which could make Auto-sklearn fail at random. +* MAINT #1000: Introduce a minimal version for ``dask.distributed``. + Version 0.11.0 ==============