Skip to content

Commit

Permalink
Fix random state not being used for sampling configurations (#1329)
Browse files Browse the repository at this point in the history
* Added random state to classifiers

* Added some doc strings

* Removed random_state again

* flake'd

* Fix some test issues

* Re-added seed to test

* Updated test doc for unknown test

* flake'd
  • Loading branch information
eddiebergman authored Dec 13, 2021
1 parent 9b39a71 commit 88ad023
Show file tree
Hide file tree
Showing 5 changed files with 558 additions and 229 deletions.
121 changes: 91 additions & 30 deletions autosklearn/util/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# -*- encoding: utf-8 -*-
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union

from ConfigSpace.configuration_space import ConfigurationSpace

from sklearn.pipeline import Pipeline
import numpy as np

from autosklearn.constants import (
BINARY_CLASSIFICATION,
Expand All @@ -16,27 +16,69 @@
from autosklearn.pipeline.regression import SimpleRegressionPipeline


__all__ = [
'get_configuration_space',
'get_class',
]
__all__ = ['get_configuration_space']


def get_configuration_space(info: Dict[str, Any],
include: Optional[Dict[str, List[str]]] = None,
exclude: Optional[Dict[str, List[str]]] = None,
) -> ConfigurationSpace:
def get_configuration_space(
info: Dict[str, Any],
include: Optional[Dict[str, List[str]]] = None,
exclude: Optional[Dict[str, List[str]]] = None,
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> ConfigurationSpace:
"""Get the configuration of a pipeline given some dataset info
Parameters
----------
info: Dict[str, Any]
Information about the dataset
include: Optional[Dict[str, List[str]]] = None
A dictionary of what components to include for each pipeline step
exclude: Optional[Dict[str, List[str]]] = None
A dictionary of what components to exclude for each pipeline step
random_state: Optional[Union[int, np.random.Randomstate]] = None
The random state to use for seeding the ConfigSpace
Returns
-------
ConfigurationSpace
The configuration space for the pipeline
"""
if info['task'] in REGRESSION_TASKS:
return _get_regression_configuration_space(info, include, exclude)
return _get_regression_configuration_space(info, include, exclude, random_state)
else:
return _get_classification_configuration_space(info, include, exclude)
return _get_classification_configuration_space(info, include, exclude, random_state)


def _get_regression_configuration_space(
info: Dict[str, Any],
include: Optional[Dict[str, List[str]]],
exclude: Optional[Dict[str, List[str]]],
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> ConfigurationSpace:
"""Get the configuration of a regression pipeline given some dataset info
def _get_regression_configuration_space(info: Dict[str, Any],
include: Optional[Dict[str, List[str]]],
exclude: Optional[Dict[str, List[str]]]
) -> ConfigurationSpace:
Parameters
----------
info: Dict[str, Any]
Information about the dataset
include: Optional[Dict[str, List[str]]] = None
A dictionary of what components to include for each pipeline step
exclude: Optional[Dict[str, List[str]]] = None
A dictionary of what components to exclude for each pipeline step
random_state: Optional[Union[int, np.random.Randomstate]] = None
The random state to use for seeding the ConfigSpace
Returns
-------
ConfigurationSpace
The configuration space for the regression pipeline
"""
task_type = info['task']
sparse = False
multioutput = False
Expand All @@ -54,15 +96,39 @@ def _get_regression_configuration_space(info: Dict[str, Any],
configuration_space = SimpleRegressionPipeline(
dataset_properties=dataset_properties,
include=include,
exclude=exclude
exclude=exclude,
random_state=random_state
).get_hyperparameter_search_space()
return configuration_space


def _get_classification_configuration_space(info: Dict[str, Any],
include: Optional[Dict[str, List[str]]],
exclude: Optional[Dict[str, List[str]]]
) -> ConfigurationSpace:
def _get_classification_configuration_space(
info: Dict[str, Any],
include: Optional[Dict[str, List[str]]],
exclude: Optional[Dict[str, List[str]]],
random_state: Optional[Union[int, np.random.RandomState]] = None
) -> ConfigurationSpace:
"""Get the configuration of a classification pipeline given some dataset info
Parameters
----------
info: Dict[str, Any]
Information about the dataset
include: Optional[Dict[str, List[str]]] = None
A dictionary of what components to include for each pipeline step
exclude: Optional[Dict[str, List[str]]] = None
A dictionary of what components to exclude for each pipeline step
random_state: Optional[Union[int, np.random.Randomstate]] = None
The random state to use for seeding the ConfigSpace
Returns
-------
ConfigurationSpace
The configuration space for the classification pipeline
"""
task_type = info['task']

multilabel = False
Expand All @@ -87,12 +153,7 @@ def _get_classification_configuration_space(info: Dict[str, Any],

return SimpleClassificationPipeline(
dataset_properties=dataset_properties,
include=include, exclude=exclude).\
get_hyperparameter_search_space()


def get_class(info: Dict[str, Any]) -> Pipeline:
if info['task'] in REGRESSION_TASKS:
return SimpleRegressionPipeline
else:
return SimpleClassificationPipeline
include=include,
exclude=exclude,
random_state=random_state
).get_hyperparameter_search_space()
4 changes: 2 additions & 2 deletions test/test_metalearning/pyMetaLearn/test_meta_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def setUp(self):
data_dir = os.path.join(data_dir, 'test_meta_base_data')
os.chdir(data_dir)

cs = autosklearn.pipeline.classification.SimpleClassificationPipeline()\
.get_hyperparameter_search_space()
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
cs = pipeline.get_hyperparameter_search_space()

self.logger = logging.getLogger()
self.base = MetaBase(cs, data_dir, logger=self.logger)
Expand Down
4 changes: 2 additions & 2 deletions test/test_metalearning/pyMetaLearn/test_metalearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def setUp(self):
data_dir = os.path.join(data_dir, 'test_meta_base_data')
os.chdir(data_dir)

self.cs = autosklearn.pipeline.classification\
.SimpleClassificationPipeline().get_hyperparameter_search_space()
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
self.cs = pipeline.get_hyperparameter_search_space()

self.logger = logging.getLogger()
meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
Expand Down
14 changes: 4 additions & 10 deletions test/test_pipeline/components/data_preprocessing/test_balancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def test_weighting_effect(self):
default = cs.get_default_configuration()
default._values['balancing:strategy'] = strategy

classifier = SimpleClassificationPipeline(
config=default, **model_args
)
classifier = SimpleClassificationPipeline(config=default, **model_args)
classifier.fit(X_train, Y_train)

predictions1 = classifier.predict(X_test)
Expand All @@ -126,9 +124,7 @@ def test_weighting_effect(self):
X_test = data_[0][100:]
Y_test = data_[1][100:]

classifier = SimpleClassificationPipeline(
config=default, **model_args
)
classifier = SimpleClassificationPipeline(config=default, **model_args)
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
classifier.fit_estimator(Xt, Y_train, **fit_params)

Expand Down Expand Up @@ -157,8 +153,7 @@ def test_weighting_effect(self):

include = {'classifier': ['sgd'], 'feature_preprocessor': [name]}

classifier = SimpleClassificationPipeline(
random_state=1, include=include)
classifier = SimpleClassificationPipeline(random_state=1, include=include)
cs = classifier.get_hyperparameter_search_space()
default = cs.get_default_configuration()
default._values['balancing:strategy'] = strategy
Expand All @@ -177,8 +172,7 @@ def test_weighting_effect(self):
Y_test = data_[1][100:]

default._values['balancing:strategy'] = strategy
classifier = SimpleClassificationPipeline(
default, random_state=1, include=include)
classifier = SimpleClassificationPipeline(default, random_state=1, include=include)
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
classifier.fit_estimator(Xt, Y_train, **fit_params)
predictions = classifier.predict(X_test)
Expand Down
Loading

0 comments on commit 88ad023

Please sign in to comment.