From 1be4dfa0f97a79d9315063e62d6fb91ff3031118 Mon Sep 17 00:00:00 2001 From: halio-g Date: Wed, 27 Apr 2022 13:41:51 -0700 Subject: [PATCH 01/36] Added the Vizier client in the aiplatform folder. --- google/cloud/aiplatform/__init__.py | 1 + .../aiplatform/compat/services/__init__.py | 8 ++++++++ google/cloud/aiplatform/utils/__init__.py | 15 ++++++++++++++ google/cloud/aiplatform/vizier/__init__.py | 20 +++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 google/cloud/aiplatform/vizier/__init__.py diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 796b57e3a1..ce494bbe56 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -50,6 +50,7 @@ CustomJob, HyperparameterTuningJob, ) +from google.cloud.aiplatform.vizier import Study from google.cloud.aiplatform.pipeline_jobs import PipelineJob from google.cloud.aiplatform.tensorboard import ( Tensorboard, diff --git a/google/cloud/aiplatform/compat/services/__init__.py b/google/cloud/aiplatform/compat/services/__init__.py index 68440de4c5..627c77b258 100644 --- a/google/cloud/aiplatform/compat/services/__init__.py +++ b/google/cloud/aiplatform/compat/services/__init__.py @@ -54,6 +54,9 @@ from google.cloud.aiplatform_v1beta1.services.tensorboard_service import ( client as tensorboard_service_client_v1beta1, ) +from google.cloud.aiplatform_v1beta1.services.vizier_service import ( + client as vizier_service_client_v1beta1, +) from google.cloud.aiplatform_v1.services.dataset_service import ( client as dataset_service_client_v1, @@ -94,6 +97,9 @@ from google.cloud.aiplatform_v1.services.tensorboard_service import ( client as tensorboard_service_client_v1, ) +from google.cloud.aiplatform_v1.services.vizier_service import ( + client as vizier_service_client_v1, +) __all__ = ( # v1 @@ -110,6 +116,7 @@ prediction_service_client_v1, specialist_pool_service_client_v1, tensorboard_service_client_v1, + vizier_service_client_v1, # v1beta1 dataset_service_client_v1beta1, endpoint_service_client_v1beta1, @@ -124,4 +131,5 @@ specialist_pool_service_client_v1beta1, metadata_service_client_v1beta1, tensorboard_service_client_v1beta1, + vizier_service_client_v1beta1, ) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index 0ea641eee3..f71f30f8d9 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -47,6 +47,8 @@ pipeline_service_client_v1beta1, prediction_service_client_v1beta1, tensorboard_service_client_v1beta1, + vizier_service_client_v1beta1, + ) from google.cloud.aiplatform.compat.services import ( dataset_service_client_v1, @@ -61,6 +63,7 @@ pipeline_service_client_v1, prediction_service_client_v1, tensorboard_service_client_v1, + vizier_service_client_v1, ) from google.cloud.aiplatform.compat.types import ( @@ -82,6 +85,7 @@ job_service_client_v1beta1.JobServiceClient, metadata_service_client_v1beta1.MetadataServiceClient, tensorboard_service_client_v1beta1.TensorboardServiceClient, + vizier_service_client_v1beta1.VizierServiceClient, # v1 dataset_service_client_v1.DatasetServiceClient, endpoint_service_client_v1.EndpointServiceClient, @@ -93,6 +97,7 @@ pipeline_service_client_v1.PipelineServiceClient, job_service_client_v1.JobServiceClient, tensorboard_service_client_v1.TensorboardServiceClient, + vizier_service_client_v1.VizierServiceClient, ) @@ -559,6 +564,15 @@ class TensorboardClientWithOverride(ClientWithOverride): (compat.V1BETA1, tensorboard_service_client_v1beta1.TensorboardServiceClient), ) +class VizierClientWithOverride(ClientWithOverride): + _is_temporary = True + _default_version = compat.DEFAULT_VERSION + _version_map = ( + (compat.V1, vizier_service_client_v1.VizierServiceClient), + (compat.V1BETA1, vizier_service_client_v1beta1.VizierServiceClient), + ) + + VertexAiServiceClientWithOverride = TypeVar( "VertexAiServiceClientWithOverride", @@ -572,6 +586,7 @@ class TensorboardClientWithOverride(ClientWithOverride): PredictionClientWithOverride, MetadataClientWithOverride, TensorboardClientWithOverride, + VizierClientWithOverride, ) diff --git a/google/cloud/aiplatform/vizier/__init__.py b/google/cloud/aiplatform/vizier/__init__.py new file mode 100644 index 0000000000..1b9a972dd8 --- /dev/null +++ b/google/cloud/aiplatform/vizier/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from google.cloud.aiplatform.vizier.study import Study + +__all__ = ( + "Study", +) From 3efab097a4e0cd3a9af5afac7ab7ec3d87e95468 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 3 May 2022 12:24:55 -0700 Subject: [PATCH 02/36] Copied the pyvizier from the open source vizier. --- .../aiplatform/vizier/pyvizier/__init__.py | 27 + .../vizier/pyvizier/base_study_config.py | 1403 +++++++++++++++++ .../vizier/pyvizier/base_study_config_test.py | 490 ++++++ .../aiplatform/vizier/pyvizier/common.py | 510 ++++++ .../aiplatform/vizier/pyvizier/common_test.py | 334 ++++ .../aiplatform/vizier/pyvizier/context.py | 47 + .../vizier/pyvizier/context_test.py | 18 + .../vizier/pyvizier/parameter_config.py | 529 +++++++ .../vizier/pyvizier/parameter_config_test.py | 321 ++++ .../cloud/aiplatform/vizier/pyvizier/trial.py | 551 +++++++ .../aiplatform/vizier/pyvizier/trial_test.py | 219 +++ 11 files changed, 4449 insertions(+) create mode 100644 google/cloud/aiplatform/vizier/pyvizier/__init__.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/base_study_config.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/common.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/common_test.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/context.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/context_test.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/parameter_config.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/trial.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/trial_test.py diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py new file mode 100644 index 0000000000..708302d393 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -0,0 +1,27 @@ +"""PyVizier classes for Pythia policies.""" + +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricInformation +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricsConfig +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricType +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ObjectiveMetricGoal +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ProblemStatement +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import SearchSpace +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import SearchSpaceSelector +from google.cloud.aiplatform.vizier.pyvizier.common import Metadata +from google.cloud.aiplatform.vizier.pyvizier.common import MetadataValue +from google.cloud.aiplatform.vizier.pyvizier.common import Namespace +from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ExternalType +from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterConfig +from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterType +from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ScaleType +from google.cloud.aiplatform.vizier.pyvizier.trial import CompletedTrial +from google.cloud.aiplatform.vizier.pyvizier.trial import Measurement +from google.cloud.aiplatform.vizier.pyvizier.trial import Metric +from google.cloud.aiplatform.vizier.pyvizier.trial import ParameterDict +from google.cloud.aiplatform.vizier.pyvizier.trial import ParameterValue +from google.cloud.aiplatform.vizier.pyvizier.trial import Trial +from google.cloud.aiplatform.vizier.pyvizier.trial import TrialFilter +from google.cloud.aiplatform.vizier.pyvizier.trial import TrialStatus +from google.cloud.aiplatform.vizier.pyvizier.trial import TrialSuggestion + +StudyConfig = ProblemStatement # To be deprecated. diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py new file mode 100644 index 0000000000..91ba16bd94 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -0,0 +1,1403 @@ +import collections +from collections import abc as collections_abc +import copy +import enum +import math +import re +from typing import Callable, Iterable, Iterator, List, Optional, Sequence, Tuple, Type, TypeVar, Union, overload + +import attr +import numpy as np +from google.cloud.aiplatform.vizier.pyvizier import common +from google.cloud.aiplatform.vizier.pyvizier import parameter_config +from google.cloud.aiplatform.vizier.pyvizier import trial + +################### PyTypes ################### +ScaleType = parameter_config.ScaleType +ExternalType = parameter_config.ExternalType +# A sequence of possible internal parameter values. +MonotypeParameterSequence = parameter_config.MonotypeParameterSequence +_T = TypeVar('_T') + + +################### Helper Classes ################### +def _min_leq_max(instance: 'MetricInformation', _, value: float): + if value > instance.max_value: + raise ValueError( + f'min_value={value} cannot exceed max_value={instance.max_value}.') + + +def _max_geq_min(instance: 'MetricInformation', _, value: float): + if value < instance.min_value: + raise ValueError( + f'min_value={instance.min_value} cannot exceed max_value={value}.') + + +# Values should NEVER be removed from ObjectiveMetricGoal, only added. +class ObjectiveMetricGoal(enum.IntEnum): + """Valid Values for MetricInformation.Goal.""" + MAXIMIZE = 1 + MINIMIZE = 2 + + # pylint: disable=comparison-with-callable + @property + def is_maximize(self) -> bool: + return self == self.MAXIMIZE + + @property + def is_minimize(self) -> bool: + return self == self.MINIMIZE + + +class MetricType(enum.Enum): + """Type of the metric. + + OBJECTIVE: Objective to be maximized / minimized. + SAFETY: Objective to be kept above / below a certain threshold. + """ + OBJECTIVE = 'OBJECTIVE' + SAFETY = 'SAFETY' # Soft constraint + + # pylint: disable=comparison-with-callable + @property + def is_safety(self) -> bool: + return self == MetricType.SAFETY + + @property + def is_objective(self) -> bool: + return self == MetricType.OBJECTIVE + + +@attr.define(frozen=False, init=True, slots=True) +class MetricInformation: + """MetricInformation provides optimization metrics configuration.""" + + # The name of this metric. An empty string is allowed for single-metric + # optimizations. + name: str = attr.field( + init=True, default='', validator=attr.validators.instance_of(str)) + + goal: ObjectiveMetricGoal = attr.field( + init=True, + # pylint: disable=g-long-lambda + converter=ObjectiveMetricGoal, + validator=attr.validators.instance_of(ObjectiveMetricGoal), + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True) + + # The following are only valid for Safety metrics. + # safety_threshold should always be set to a float (default 0.0), for safety + # metrics. + safety_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True) + safety_std_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True) + percentage_unsafe_trials_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True) + + # Minimum value of this metric can be optionally specified. + min_value: float = attr.field( + init=True, + default=None, + # FYI: Converter is applied before validator. + converter=lambda x: float(x) if x is not None else -np.inf, + validator=[attr.validators.instance_of(float), _min_leq_max], + kw_only=True) + + # Maximum value of this metric can be optionally specified. + max_value: float = attr.field( + init=True, + default=None, + # FYI: Converter is applied before validator. + converter=lambda x: float(x) if x is not None else np.inf, + validator=[attr.validators.instance_of(float), _max_geq_min], + on_setattr=attr.setters.validate, + kw_only=True) + + def min_value_or(self, default_value_fn: Callable[[], float]) -> float: + """Returns the minimum value if finite, or default_value_fn(). + + Avoids the common pitfalls of using + `metric.min_value or default_value` + which would incorrectly use the default_value when min_value == 0, and + requires default_value to have been computed. + + Args: + default_value_fn: Default value if min_value is not finite. + """ + if np.isfinite(self.min_value): + return self.min_value + else: + return default_value_fn() + + def max_value_or(self, default_value_fn: Callable[[], float]) -> float: + """Returns the minimum value if finite, or default_value_fn(). + + Avoids the common pitfalls of using + `metric.max_value or default_value` + which would incorrectly use the default_value when max_value == 0, and + requires default_value to have been computed. + + Args: + default_value_fn: Default value if max_value is not configured. + """ + if np.isfinite(self.max_value): + return self.max_value + else: + return default_value_fn() + + @property + def range(self) -> float: + """Range of the metric. Can be infinite.""" + return self.max_value - self.min_value + + @property + def type(self) -> MetricType: + if (self.safety_threshold is not None or + self.safety_std_threshold is not None): + return MetricType.SAFETY + else: + return MetricType.OBJECTIVE + + def flip_goal(self) -> 'MetricInformation': + """Flips the goal in-place and returns the reference to self.""" + if self.goal == ObjectiveMetricGoal.MAXIMIZE: + self.goal = ObjectiveMetricGoal.MINIMIZE + else: + self.goal = ObjectiveMetricGoal.MAXIMIZE + return self + + +@attr.define(frozen=False, init=True, slots=True) +class MetricsConfig(collections_abc.Collection): + """Container for metrics. + + Metric names should be unique. + """ + _metrics: List[MetricInformation] = attr.ib( + init=True, + factory=list, + converter=list, + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(MetricInformation), + iterable_validator=attr.validators.instance_of(Iterable))) + + def item(self) -> MetricInformation: + if len(self._metrics) != 1: + raise ValueError('Can be called only when there is exactly one metric!') + return self._metrics[0] + + def _assert_names_are_unique(self) -> None: + counts = collections.Counter(metric.name for metric in self._metrics) + if len(counts) != len(self._metrics): + for name, count in counts.items(): + if count > 1: + raise ValueError(f'Duplicate metric name: {name} in {self._metrics}') + + def __attrs_post_init__(self): + self._assert_names_are_unique() + + def __iter__(self) -> Iterator[MetricInformation]: + return iter(self._metrics) + + def __contains__(self, x: object) -> bool: + return x in self._metrics + + def __len__(self) -> int: + return len(self._metrics) + + def __add__(self, metrics: Iterable[MetricInformation]) -> 'MetricsConfig': + return MetricsConfig(self._metrics + list(metrics)) + + def of_type( + self, include: Union[MetricType, + Iterable[MetricType]]) -> 'MetricsConfig': + """Filters the Metrics by type.""" + if isinstance(include, MetricType): + include = (include,) + return MetricsConfig(m for m in self._metrics if m.type in include) + + def append(self, metric: MetricInformation): + self._metrics.append(metric) + self._assert_names_are_unique() + + def extend(self, metrics: Iterable[MetricInformation]): + for metric in metrics: + self.append(metric) + + @property + def is_single_objective(self) -> bool: + """Returns True if only one objective metric is configured.""" + return len(self.of_type(MetricType.OBJECTIVE)) == 1 + + +@attr.s(frozen=True, init=True, slots=True, kw_only=True) +class _PathSegment: + """Selection of a parameter name and one of its values.""" + # A ParameterConfig name. + name: str = attr.ib( + init=True, validator=attr.validators.instance_of(str), kw_only=True) + + # A ParameterConfig value. + value: Union[int, float, str] = attr.ib( + init=True, + validator=attr.validators.instance_of((int, float, str)), + kw_only=True) + + +class _PathSelector(Sequence[_PathSegment]): + """Immutable sequence of path segments.""" + + def __init__(self, iterable: Iterable[_PathSegment] = tuple()): + self._paths = tuple(iterable) + + @overload + def __getitem__(self, s: slice) -> '_PathSelector': + ... + + @overload + def __getitem__(self, i: int) -> _PathSegment: + ... + + def __getitem__(self, index): + item = self._paths[index] + if isinstance(item, _PathSegment): + return item + else: + return _PathSelector(item) + + def __len__(self) -> int: + """Returns the number of elements in the container.""" + return len(self._paths) + + def __add__( + self, other: Union[Sequence[_PathSegment], + _PathSegment]) -> '_PathSelector': + if isinstance(other, _PathSegment): + other = [other] + return _PathSelector(self._paths + tuple(other)) + + def __str__(self) -> str: + """Returns the path as a string.""" + return '/'.join(['{}={}'.format(p.name, p.value) for p in self._paths]) + + +class InvalidParameterError(Exception): + """Error thrown when parameter values are invalid.""" + + +################### Main Classes ################### +@attr.s(frozen=True, init=True, slots=True, kw_only=True) +class SearchSpaceSelector: + """A Selector for all, or part of a SearchSpace.""" + + # List of ParameterConfig objects referenced by this selector. + # This is a reference to a list of objects owned by SearchSpace (and will + # typically include the entire SearchSpace). + _configs: List[parameter_config.ParameterConfig] = attr.ib( + init=True, + factory=list, + # Verify that this is a list of ParameterConfig objects. + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of( + parameter_config.ParameterConfig), + iterable_validator=attr.validators.instance_of(list)), + kw_only=True) + + # _selected_path and _selected_name control how parameters are added to the + # search space. + # + # 1) If _selected_path is empty, and _selected_name is empty, parameters + # are added to the root of the search space. + # 2) If _selected_path is empty, and _selected_name is non-empty, parameters + # will be added as child parameters to all root and child parameters + # with name ==_selected_name. + # 3) If both _selected_path and _selected_name are specified, parameters will + # be added as child parameters to the parameter specified by the path and + # the name. + # 4) If _selected_path is non-empty, and _selected_name is empty, this is an + # error. + + # An ordered list of _PathSelector objects which uniquely identifies a path + # in a conditional tree. + _selected_path: _PathSelector = attr.ib( + init=True, + default=_PathSelector(), + converter=_PathSelector, + # Verify that this is a list of _PathSegment objects. + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(_PathSegment), + iterable_validator=attr.validators.instance_of(Iterable)), + kw_only=True) + + # A ParameterConfig name. + # If there is a _selected_name, then there have to also be _selected_values + # below, and new parameters are added to the parent(s) selected by + # _selected_path and _selected_name. + _selected_name: str = attr.ib( + init=True, + default='', + validator=attr.validators.instance_of(str), + kw_only=True) + + # List of ParameterConfig values from _configs. + # If there are _selected_values, then there have to also be _selected_name + # above. + _selected_values: MonotypeParameterSequence = attr.ib( + init=True, + factory=list, + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of(list)), + kw_only=True) + + @property + def parameter_name(self) -> str: + """Returns the selected parameter name.""" + return self._selected_name + + @property + def parameter_values(self) -> MonotypeParameterSequence: + """Returns the selected parameter values.""" + return copy.deepcopy(self._selected_values) + + def add_float_param(self, + name: str, + min_value: float, + max_value: float, + *, + default_value: Optional[float] = None, + scale_type: Optional[ScaleType] = ScaleType.LINEAR, + index: Optional[int] = None) -> 'SearchSpaceSelector': + """Adds floating point parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + min_value: Inclusive lower bound for the parameter. + max_value: Inclusive upper bound for the parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='rate' and index=0, then a single ParameterConfig with name + 'rate[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector(s) for the newly added parameter(s): + One SearchSpaceSelector if one parameter was added, or a list of + SearchSpaceSelector if multiple parameters were added. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + bounds = (float(min_value), float(max_value)) + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + bounds=bounds, + scale_type=scale_type, + default_value=default_value) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_int_param(self, + name: str, + min_value: int, + max_value: int, + *, + default_value: Optional[int] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None) -> 'SearchSpaceSelector': + """Adds integer parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + min_value: Inclusive lower bound for the parameter. + max_value: Inclusive upper bound for the parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='hidden_units' and index=0, then a single ParameterConfig with name + 'hidden_units[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If min_value or max_value are not integers. + ValueError: If `index` is invalid (e.g. negative). + """ + int_min_value = int(min_value) + if not math.isclose(min_value, int_min_value): + raise ValueError('min_value for an INTEGER parameter should be an integer' + ', got: [{}]'.format(min_value)) + int_max_value = int(max_value) + if not math.isclose(max_value, int_max_value): + raise ValueError('max_value for an INTEGER parameter should be an integer' + ', got: [{}]'.format(min_value)) + bounds = (int_min_value, int_max_value) + + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + bounds=bounds, + scale_type=scale_type, + default_value=default_value) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_discrete_param( + self, + name: str, + feasible_values: Union[Sequence[float], Sequence[int]], + *, + default_value: Optional[Union[float, int]] = None, + scale_type: Optional[ScaleType] = ScaleType.LINEAR, + index: Optional[int] = None, + auto_cast: Optional[bool] = True) -> 'SearchSpaceSelector': + """Adds ordered numeric parameter config(s) with a finite set of values. + + IMPORTANT: If a parameter is discrete, its values are assumed to have + ordered semantics. Thus, you should not use discrete parameters for + unordered values such as ids. In this case, see add_categorical_param() + below. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: The set of feasible values for this parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='batch_size' and index=0, then a single ParameterConfig with name + 'batch_size[0]' is added. `index` should be >= 0. + auto_cast: If False, the external type will be set to INTEGER if all + values are castable to an integer without losing precision. If True, the + external type will be set to float. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + param_names = self._get_parameter_names_to_create(name=name, index=index) + + external_type = ExternalType.FLOAT + if auto_cast: + # If all feasible values are convertible to ints without loss of + # precision, annotate the external type as INTEGER. This will cast + # [0., 1., 2.] into [0, 1, 2] when parameter values are returned in + # clients. + if all([v == round(v) for v in feasible_values]): + external_type = ExternalType.INTEGER + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value, + external_type=external_type) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_categorical_param( + self, + name: str, + feasible_values: Sequence[str], + *, + default_value: Optional[str] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None) -> 'SearchSpaceSelector': + """Adds unordered string-valued parameter config(s) to the search space. + + IMPORTANT: If a parameter is categorical, its values are assumed to be + unordered. If the `feasible_values` have ordering, use add_discrete_param() + above, since it will improve Vizier's model quality. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: The set of feasible values for this parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='id' and index=0, then a single ParameterConfig with name 'id[0]' + is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_bool_param(self, + name: str, + feasible_values: Optional[Sequence[bool]] = None, + *, + default_value: Optional[bool] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None) -> 'SearchSpaceSelector': + """Adds boolean-valued parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: An optional list of feasible boolean values, i.e. one of + the following: [True], [False], [True, False], [False, True]. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='match' and index=0, then a single ParameterConfig with name + 'match[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `feasible_values` has invalid values. + ValueError: If `index` is invalid (e.g. negative). + """ + allowed_values = (None, (True, False), (False, True), (True,), (False,)) + if feasible_values not in allowed_values: + raise ValueError('feasible_values must be one of %s; got: %s.' % + (allowed_values, feasible_values)) + # Boolean parameters are represented as categorical parameters internally. + bool_to_string = lambda x: 'True' if x else 'False' + if feasible_values is None: + categories = ('True', 'False') + else: + categories = [bool_to_string(x) for x in feasible_values] + feasible_values = sorted(categories, reverse=True) + + if default_value is not None: + default_value = bool_to_string(default_value) + + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value, + external_type=ExternalType.BOOLEAN) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def select( + self, + parameter_name: str, + parameter_values: Optional[MonotypeParameterSequence] = None + ) -> 'SearchSpaceSelector': + """Selects a single parameter specified by path and parameter_name. + + This method should be called to select a parent parameter, before calling + `add_*_param` methods to create child parameters. + + Given a selector to the root of the search space: + root = pyvizier.SearchSpace().select_root() + + 1) To select a parameter at the root of the search space, with parent values + for child parameters: + model = root.select('model_type', ['dnn']) + model.add_float_param('hidden_units', ...) + 2) To select a parameter at the root of the search space, and defer parent + value selection to later calls: + model = root.select('model_type') + # Add `hidden_units` and `optimizer_type` as `dnn` children. + model.select_values(['dnn']).add_float_param('hidden_units', ...) + model.select_values(['dnn']).add_categorical_param( + 'optimizer_type', ['adam', 'adagrad']) + # Add `optimizer_type` and `activation` as `linear` children. + model.select_values(['linear']).add_categorical_param( + 'optimizer_type', ['adam', 'ftrl']) + model.select_values(['linear']).add_categorical_param('activation', ...) + 3) To select a parameter in a conditional search space, specify a path, by + chaining select() calls: + optimizer = root.select('model_type', ['linear']).select('optimizer_type') + optimizer.select_values('adam').add_float_param('learning_rate', 0.001,..) + optimizer.select_values('ftrl').add_float_param('learning_rate', 0.1,..) + + # OR pre-select the parent parameter values: + optimizer = root.select('model_type', ['linear']).select( + 'optimizer_type', ['adam']) + optimizer.add_float_param('learning_rate', 0.001,...) + 4) If there is *only one* parameter with the given name, then it is possible + to select it without specifying the path, using: + selectors = root.select_all('activation') + # 'activation' exists only under model_type='linear'. + assert len(selectors) == 1 + activation = selectors[0] + + Args: + parameter_name: + parameter_values: Optional parameter values for this selector, which will + be used to add child parameters, or traverse a conditional tree. + + Returns: + A new SearchSpaceSelector. + """ + # Make sure parameter_name exists in the conditional parameters tree. + # parameter_values will be validated only when a child parameter is added. + if not self._parameter_exists(parameter_name): + raise ValueError('No parameter with name {} exists in this SearchSpace') + + path = [] + selected_values = [] + if parameter_values is not None: + if not isinstance(parameter_values, (list, tuple)): + raise ValueError('parameter_values should be a list or tuple, given ' + '{} with type {}'.format(parameter_values, + type(parameter_values))) + selected_values = parameter_values + + if self._selected_name: + # There is already a parameter name selected, so this is a chained select + # call. + if not self._selected_values: + raise ValueError('Cannot call select() again before parameter values ' + 'are selected: parameter {} was previously selected, ' + ' with the path: {}, but no values were selected for ' + 'it'.format(self.parameter_name, self.path_string)) + # Return a new selector, with the currently selected parameter added to + # the path. + new_path_segment = [ + _PathSegment( + name=self._selected_name, value=self._selected_values[0]) + ] + path = self._selected_path + new_path_segment + if not self._path_exists(path): + raise ValueError('Path {} does not exist in this SearchSpace: ' + '{}'.format((path), self)) + + return SearchSpaceSelector( + configs=self._configs, + selected_path=path, + selected_name=parameter_name, + selected_values=selected_values) + + def select_values( + self, + parameter_values: MonotypeParameterSequence) -> 'SearchSpaceSelector': + """Selects values for a pre-selected parameter. + + This method should be called to select parent parameter(s) value(s), before + calling `add_*_param` methods to create child parameters. + + This method must be called AFTER select(). + This method mutates this selector. + + Args: + parameter_values: Parameter values for this selector, which will be used + to add child parameters. + + Returns: + SearchSpaceSelector + """ + if not self._selected_name: + raise ValueError('No parameter is selected. Call select() first.') + if not parameter_values: + raise ValueError( + 'parameter_values cannot be empty. Specify at least one value.') + if not isinstance(parameter_values, (list, tuple)): + raise ValueError('parameter_values should be a list or tuple, given ' + '{} with type {}'.format(parameter_values, + type(parameter_values))) + # TODO: Allow to directly select boolean parent parameters. + object.__setattr__(self, '_selected_values', parameter_values) + return self + + def select_all( + self, parameter_name: str, parameter_values: MonotypeParameterSequence + ) -> List['SearchSpaceSelector']: + """Select one or more parent parameters, with the same name. + + This method should be called to select parent parameter(s), before calling + `add_*_param` methods to create child parameters. + Multiple parent parameters with the same name are possible in a conditional + search space. See go/conditional-parameters for more details. + + 1) If the conditional search space has two parameters with the same + name, 'optimizer_type', given a selector to the root of the search space, + select_all() can be used to simultaneously add child parameters to both + 'optimizer_type` parameters: + + root = pyvizier.SearchSpace().select_root() + model.select_values(['dnn']).add_categorical_param( + 'optimizer_type', ['adam', 'adagrad']) + model.select_values(['linear']).add_categorical_param( + 'optimizer_type', ['adam', 'ftrl']) + # Add a 'learning_rate' parameter to both 'adam' optimizers: + optimizers = model.select_all('optimizer_type', parent_values=['adam']) + optimizers.add_float_param('learning_rate', ...) + + 2) If there is *only one* parameter with the given name, then it is also + possible to use select_all() to select it: + root = pyvizier.SearchSpace().select_root() + model.select_values(['dnn']).add_categorical_param('activation', ...) + # Select the single parameter with the name 'activation': + selectors = root.select_all('activation') + assert len(selectors) == 1 + activation = selector[0] + + Args: + parameter_name: + parameter_values: Optional parameter values for this selector, which will + be used to add child parameters. + + Returns: + List of SearchSpaceSelector + """ + # TODO: Raise an error if this selector already has selected_name. + # Make sure parameter_name exists in the conditional parameters tree. + if not self._parameter_exists(parameter_name): + raise ValueError('No parameter with name {} exists in this SearchSpace') + + if parameter_values is not None: + if not isinstance(parameter_values, (list, tuple)): + raise ValueError('parameter_values should be a list or tuple, given ' + '{} with type {}'.format(parameter_values, + type(parameter_values))) + # TODO: Complete this method. + raise NotImplementedError() + + def _path_exists(self, path: _PathSelector) -> bool: + """Checks if the path exists in the conditional tree.""" + for parent in self._configs: + if (path[0].name == parent.name and + path[0].value in parent.feasible_values): + if len(path) == 1: + # No need to recurse. + return True + return self._path_exists_inner(parent, path[1:]) + return False + + @classmethod + def _path_exists_inner(cls, current_root: parameter_config.ParameterConfig, + current_path: _PathSelector) -> bool: + """Returns true if the path exists, starting at root_parameter.""" + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if (current_path[0].name == child.name and + current_path[0].value in child.feasible_values): + child_idx = idx + break + if child_idx is None: + # No match is found. This path does not exist. + return False + if len(current_path) == 1: + # This is the end of the path. + return True + # Keep traversing. + return cls._path_exists_inner( + current_root.child_parameter_configs[child_idx], current_path[1:]) + + def _parameter_exists(self, parameter_name: str) -> bool: + """Checks if there exists at least one parameter with this name. + + Note that this method checks existence in the entire search space. + + Args: + parameter_name: + + Returns: + bool: Exists. + """ + found = False + for parent in self._configs: + for pc in parent.traverse(show_children=False): + if pc.name == parameter_name: + found = True + break + return found + + @classmethod + def _get_parameter_names_to_create(cls, + *, + name: str, + length: Optional[int] = None, + index: Optional[int] = None) -> List[str]: + """Returns the names of all parameters which should be created. + + Args: + name: The base parameter name. + length: Specifies the length of a multi-dimensional parameters. If larger + than 1, then multiple ParameterConfigs are added. E.g. if name='rate' + and length=2, then two ParameterConfigs with names 'rate[0]', 'rate[1]' + are added. Cannot be specified together with `index`. + index: Specifies the multi-dimensional index for this parameter. Cannot be + specified together with `length`. E.g. if name='rate' and index=1, then + a single ParameterConfig with name 'rate[1]' is added. + + Returns: + List of parameter names to create. + + Raises: + ValueError: If `length` or `index` are invalid. + """ + if length is not None and index is not None: + raise ValueError('Only one of `length` and `index` can be specified. Got' + ' length={}, index={}'.format(length, index)) + if length is not None and length < 1: + raise ValueError('length must be >= 1. Got length={}'.format(length)) + if index is not None and index < 0: + raise ValueError('index must be >= 0. Got index={}'.format(index)) + + param_names = [] + if length is None and index is None: + # Add one parameter with no multi-dimensional index. + param_names.append(name) + elif index is not None: + # Add one parameter with a multi-dimensional index. + param_names.append(cls._multi_dimensional_parameter_name(name, index)) + elif length is not None: + # `length > 0' is synthatic sugar for multi multi-dimensional parameter. + # Each multi-dimensional parameter is encoded as a list of separate + # parameters with names equal to `name[index]` (index is zero based). + for i in range(length): + param_names.append(cls._multi_dimensional_parameter_name(name, i)) + return param_names + + @classmethod + def _multi_dimensional_parameter_name(cls, name: str, index: int) -> str: + """Returns the indexed parameter name.""" + return '{}[{}]'.format(name, index) + + @classmethod + def parse_multi_dimensional_parameter_name( + cls, name: str) -> Optional[Tuple[str, int]]: + """Returns the base name for a multi-dimensional parameter name. + + Args: + name: A parameter name. + + Returns: + (base_name, index): if name='hidden_units[10]', base_name='hidden_units' + and index=10. + Returns None if name is not in the format 'base_name[idx]'. + """ + regex = r'(?P[^()]*)\[(?P\d+)\]$' + pattern = re.compile(regex) + matches = pattern.match(name) + if matches is None: + return None + return (matches.groupdict()['name'], int(matches.groupdict()['index'])) + + @property + def path_string(self) -> str: + """Returns the selected path as a string.""" + return str(self._selected_path) + + def _add_parameters( + self, parameters: List[parameter_config.ParameterConfig] + ) -> List['SearchSpaceSelector']: + """Adds ParameterConfigs either to the root, or as child parameters. + + Args: + parameters: The parameters to add to the search space. + + Returns: + A list of SearchSpaceSelectors, one for each parameters added. + """ + if self._selected_name and not self._selected_values: + raise ValueError( + 'Cannot add child parameters to parameter {}: parent values were ' + 'not selected. Call select_values() first.'.format( + self._selected_name)) + if not self._selected_name and self._selected_values: + raise ValueError( + 'Cannot add child parameters: no parent name is selected.' + ' Call select() or select_all() first.') + if self._selected_path and not self._selected_name: + raise ValueError( + 'Cannot add child parameters: path is specified ({}), but no parent' + ' name is specified. Call select() or select_all() first'.format( + self.path_string)) + + selectors: List['SearchSpaceSelector'] = [] + if not self._selected_path and not self._selected_name: + # If _selected_path is empty, and _selected_name is empty, parameters + # are added to the root of the search space. + self._configs.extend(parameters) + # Return Selectors for the newly added parameters. + for param in parameters: + selectors.append( + SearchSpaceSelector( + configs=self._configs, + selected_path=[], + selected_name=param.name, + selected_values=[])) + elif not self._selected_path and self._selected_name: + # If _selected_path is empty, and _selected_name is not empty, parameters + # will be added as child parameters to *all* root and child parameters + # with name ==_selected_name. + for idx, root_param in enumerate(self._configs): + updated_param, new_selectors = self._recursive_add_child_parameters( + self._configs, _PathSelector(), root_param, self._selected_name, + self._selected_values, parameters) + # Update the root ParameterConfig in place. + self._configs[idx] = updated_param + selectors.extend(new_selectors) + else: + # If both _selected_path and _selected_name are specified, parameters will + # be added as child parameters to the parameter specified by the path and + # the name. + idx, updated_param, new_selectors = self._add_parameters_at_selected_path( + root_configs=self._configs, + complete_path=self._selected_path, + parent_name=self._selected_name, + parent_values=self._selected_values, + new_children=parameters) + # Update the root ParameterConfig in place. + self._configs[idx] = updated_param + selectors.extend(new_selectors) + + if not selectors: + raise ValueError( + 'Cannot add child parameters: the path ({}), is not valid.'.format( + self.path_string)) + return selectors + + @classmethod + def _recursive_add_child_parameters( + cls, configs: List[parameter_config.ParameterConfig], path: _PathSelector, + root: parameter_config.ParameterConfig, parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig] + ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: + """Recursively adds new children to all matching parameters. + + new_children are potentially added to root, and all matching child + parameters with name==parent_name. + + Args: + configs: A list of configs to include in returned SearchSpaceSelectors, + this list is not modified or used for anything else. + path: The path to include in returned SearchSpaceSelectors. + root: Parent parameter to start the recursion at. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, list of selectors to + any parameters which may have been added) + """ + updated_children: List[Tuple[MonotypeParameterSequence, + parameter_config.ParameterConfig]] = [] + selectors: List['SearchSpaceSelector'] = [] + if root.name == parent_name: + # Add new children to this root. If this is a leaf parameter, + # e.g. it has no children, this is where the recursion ends. + for child in new_children: + updated_children.append((parent_values, child)) + # For the path, select one parent value, since for the path, the exact + # value does not matter, as long as it's valid. + root_path_fragment = [ + _PathSegment(name=root.name, value=parent_values[0]) + ] + selectors.append( + SearchSpaceSelector( + configs=configs, + selected_path=path + root_path_fragment, + selected_name=child.name, + selected_values=[])) + # Recursively update existing children, if any. + for child in root.child_parameter_configs: + # For the path, select one parent value, since for the path, the exact + # value does not matter, as long as it's valid. + root_path_fragment = [ + _PathSegment(name=root.name, value=child.matching_parent_values[0]) + ] + updated_child, new_selectors = cls._recursive_add_child_parameters( + configs, path + root_path_fragment, child, parent_name, parent_values, + new_children) + updated_children.append( + (updated_child.matching_parent_values, updated_child)) + selectors += new_selectors + # Update all children (existing and potentially new) in the root. + return root.clone_without_children.add_children(updated_children), selectors + + @classmethod + def _add_parameters_at_selected_path( + cls, root_configs: List[parameter_config.ParameterConfig], + complete_path: _PathSelector, parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig] + ) -> Tuple[int, parameter_config.ParameterConfig, + List['SearchSpaceSelector']]: + """Adds new children to the parameter specified by the path and parent_name. + + Args: + root_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + complete_path: The path to include in the returned SearchSpaceSelectors. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (Root index in root_configs, + an updated root with all of its children updated, + list of selectors to any parameters which may have been added) + + Raises: + RuntimeError: + ValueError: + """ + if not complete_path: + # This is an internal error, since the caller should never specify an + # empty current_path. + raise RuntimeError('Internal error: got empty complete_path') + + # This is the beginning of the recursion. Select a root to recurse at. + current_root: Optional[parameter_config.ParameterConfig] = None + root_idx: int = 0 + for root_idx, root_param in enumerate(root_configs): + if complete_path[0].name == root_param.name: + current_root = root_param + break + if current_root is None: + raise ValueError('Invalid path: {}: failed to traverse the path: failed' + ' to find a matching root for parameter name "{}".' + ' Root parameter names: {}'.format( + (complete_path), complete_path[0].name, + [pc.name for pc in root_configs])) + + updated_root, selectors = cls._add_parameters_at_selected_path_inner( + root_configs=root_configs, + complete_path=complete_path, + current_root=current_root, + current_path=complete_path[1:], + parent_name=parent_name, + parent_values=parent_values, + new_children=new_children) + return (root_idx, updated_root, selectors) + + @classmethod + def _add_parameters_at_selected_path_inner( + cls, root_configs: List[parameter_config.ParameterConfig], + complete_path: _PathSelector, + current_root: parameter_config.ParameterConfig, + current_path: _PathSelector, parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig] + ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: + """Adds new children to the parameter specified by the path and parent_name. + + Args: + root_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + complete_path: The path to include in the returned SearchSpaceSelectors. + current_root: Parent parameter to start the recursion at. + current_path: The path to the parent parameter from current_root. This is + used in the recursion. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, + List of selectors to all added parameters) + + Raises: + RuntimeError: + ValueError: + """ + updated_children: List[Tuple[MonotypeParameterSequence, + parameter_config.ParameterConfig]] = [] + selectors: List['SearchSpaceSelector'] = [] + + if not current_path: + # This is the end of the path. End the recursion. + # parent_name should be a child of current_root + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if parent_name == child.name: + child_idx = idx + last_parent_path = [ + _PathSegment(name=parent_name, value=parent_values[0]) + ] + new_path = complete_path + last_parent_path + updated_child, selectors = cls._add_child_parameters( + root_configs, new_path, child, parent_values, new_children) + break + if child_idx is None: + raise ValueError('Invalid parent_name: after traversing the path "{}", ' + 'failed to find a child parameter with name "{}".' + ' Current root="{}"'.format((complete_path), + parent_name, current_root)) + + # Update current_root with the updated child. + for idx, child in enumerate(current_root.child_parameter_configs): + if idx == child_idx: + updated_children.append( + (updated_child.matching_parent_values, updated_child)) + else: + updated_children.append((child.matching_parent_values, child)) + return ( + current_root.clone_without_children.add_children(updated_children), + selectors) + + # Traverse the path: find which child matches the next path selection. + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if (current_path[0].name == child.name and + current_path[0].value in child.feasible_values): + child_idx = idx + break + if child_idx is None: + raise ValueError('Invalid path: "{}": failed to traverse the path: failed' + ' to find a matching child for path selector "{}".' + ' Current root="{}", current_path="{}"'.format( + (complete_path), (current_path[:1]), + current_root.name, (current_path))) + + updated_child, selectors = cls._add_parameters_at_selected_path_inner( + root_configs=root_configs, + complete_path=complete_path, + current_root=current_root.child_parameter_configs[child_idx], + current_path=current_path[1:], + parent_name=parent_name, + parent_values=parent_values, + new_children=new_children) + # Update current_root with the updated child, leave the selectors untouched. + for idx, child in enumerate(current_root.child_parameter_configs): + if idx == child_idx: + updated_children.append( + (updated_child.matching_parent_values, updated_child)) + else: + updated_children.append((child.matching_parent_values, child)) + return (current_root.clone_without_children.add_children(updated_children), + selectors) + + @classmethod + def _add_child_parameters( + cls, selector_configs: List[parameter_config.ParameterConfig], + selector_path: _PathSelector, parent: parameter_config.ParameterConfig, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig] + ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: + """Adds new children to the parent parameter and returns selectors. + + Args: + selector_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + selector_path: The path to include in the returned SearchSpaceSelectors. + parent: Parent parameter to add children to. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, + List of selectors to all added parameters) + + Raises: + RuntimeError: + ValueError: + """ + updated_children: List[Tuple[MonotypeParameterSequence, + parameter_config.ParameterConfig]] = [] + selectors: List['SearchSpaceSelector'] = [] + + # Add existing children. + for child in parent.child_parameter_configs: + updated_children.append((child.matching_parent_values, child)) + # Add new child parameter configs. + for child in new_children: + updated_children.append((parent_values, child)) + selectors.append( + SearchSpaceSelector( + configs=selector_configs, + selected_path=selector_path, + selected_name=child.name, + selected_values=[])) + # Add all children (existing and potentially new) to the parent. + return (parent.clone_without_children.add_children(updated_children), + selectors) + + +@attr.s(frozen=True, init=True, slots=True, kw_only=True) +class SearchSpace: + """A builder and wrapper for StudyConfig.parameter_configs.""" + + _parameter_configs: List[parameter_config.ParameterConfig] = attr.ib( + init=False, factory=list) + + @classmethod + def _factory( + cls: Type[_T], + parameter_configs: Optional[List[parameter_config.ParameterConfig]] = None + ) -> _T: + """Creates a new SearchSpace containing the provided parameter configs. + + Args: + parameter_configs: + + Returns: + SearchSpace + """ + if parameter_configs is None: + parameter_configs = [] + space = cls() + object.__setattr__(space, '_parameter_configs', list(parameter_configs)) + return space + + @property + def parameters(self) -> List[parameter_config.ParameterConfig]: + """Returns COPIES of the parameter configs in this Space.""" + return copy.deepcopy(self._parameter_configs) + + def select_root(self) -> SearchSpaceSelector: + """Returns a selector for the root of the search space. + + Parameters can be added to the search space using the returned + SearchSpaceSelector. + """ + return SearchSpaceSelector(configs=self._parameter_configs) + + @property + def is_conditional(self) -> bool: + """Returns True if search_space contains any conditional parameters.""" + return any([p.child_parameter_configs for p in self._parameter_configs]) + + def contains(self, parameters: trial.ParameterDict) -> bool: + try: + self.assert_contains(parameters) + return True + except InvalidParameterError: + return False + + def assert_contains(self, parameters: trial.ParameterDict) -> bool: + """Throws an error if parameters is not a valid point in the space. + + Args: + parameters: + + Returns: + Always returns True unless an exception is Raised. + + Raises: + InvalidParameterError: If parameters are invalid. + NotImplementedError: If parameter type is unknown + """ + if self.is_conditional: + raise NotImplementedError('Not implemented for conditional space.') + if len(parameters) != len(self._parameter_configs): + set1 = set(pc.name for pc in self._parameter_configs) + set2 = set(parameters) + raise InvalidParameterError( + f'Search space has {len(self._parameter_configs)} parameters ' + f'but only {len(parameters)} were given. ' + f'Missing in search space: {set2 - set1}. ' + f'Missing in parameters: {set1 - set2}.') + for pc in self._parameter_configs: + if pc.name not in parameters: + raise InvalidParameterError(f'{pc.name} is missing in {parameters}.') + elif not pc.contains(parameters[pc.name]): + raise InvalidParameterError( + f'{parameters[pc.name]} is not feasible in {pc}') + return True + + +################### Main Class ################### +@attr.define(frozen=False, init=True, slots=True) +class ProblemStatement: + """A builder and wrapper for core StudyConfig functionality.""" + + search_space: SearchSpace = attr.ib( + init=True, + factory=SearchSpace, + validator=attr.validators.instance_of(SearchSpace)) + + metric_information: MetricsConfig = attr.ib( + init=True, + factory=MetricsConfig, + converter=MetricsConfig, + validator=attr.validators.instance_of(MetricsConfig), + kw_only=True) + + metadata: common.Metadata = attr.field( + init=True, + kw_only=True, + factory=common.Metadata, + validator=attr.validators.instance_of(common.Metadata)) + + @property + def debug_info(self) -> str: + return '' diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py new file mode 100644 index 0000000000..5ef52dad05 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py @@ -0,0 +1,490 @@ +"""Tests for vizier.pyvizier.shared.base_study_config.""" + +import numpy as np +from vizier._src.pyvizier.shared import base_study_config +from vizier._src.pyvizier.shared import parameter_config as pc +from vizier._src.pyvizier.shared import trial +from absl.testing import absltest +from absl.testing import parameterized + + +class ObjectiveMetricGoalTest(absltest.TestCase): + + def test_basics(self): + self.assertTrue(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_maximize) + self.assertFalse(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_minimize) + self.assertTrue(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_minimize) + self.assertFalse(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_maximize) + + +class MetricTypeTest(absltest.TestCase): + + def test_basics(self): + self.assertTrue(base_study_config.MetricType.SAFETY.is_safety) + self.assertTrue(base_study_config.MetricType.OBJECTIVE.is_objective) + + +class MetricInformationTest(absltest.TestCase): + + def testMinMaxValueDefault(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE) + self.assertEqual(info.min_value, -np.inf) + self.assertEqual(info.max_value, np.inf) + + def testMinMaxValueSet(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=-1., + max_value=1.) + self.assertEqual(info.min_value, -1.) + self.assertEqual(info.max_value, 1.) + + def testMinMaxBadValueInit(self): + with self.assertRaises(ValueError): + base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=1., + max_value=-1.) + + def testMinMaxBadValueSet(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=-1., + max_value=1.) + with self.assertRaises(ValueError): + info.min_value = 2. + with self.assertRaises(ValueError): + info.max_value = -2. + + +class MetricsConfigTest(parameterized.TestCase): + + def testBasics(self): + config = base_study_config.MetricsConfig() + config.append( + base_study_config.MetricInformation( + name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) + config.extend([ + base_study_config.MetricInformation( + name='max_safe1', + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + safety_threshold=0.0), + base_study_config.MetricInformation( + name='max2', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE), + base_study_config.MetricInformation( + name='min1', goal=base_study_config.ObjectiveMetricGoal.MINIMIZE), + base_study_config.MetricInformation( + name='min_safe2', + goal=base_study_config.ObjectiveMetricGoal.MINIMIZE, + safety_threshold=0.0) + ]) + self.assertLen(config, 5) + self.assertLen(config.of_type(base_study_config.MetricType.OBJECTIVE), 3) + self.assertLen(config.of_type(base_study_config.MetricType.SAFETY), 2) + + def testDuplicateNames(self): + config = base_study_config.MetricsConfig() + config.append( + base_study_config.MetricInformation( + name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) + with self.assertRaises(ValueError): + config.append( + base_study_config.MetricInformation( + name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) + + +class SearchSpaceTest(parameterized.TestCase): + + def testAddFloatParamMinimal(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + selector = space.select_root().add_float_param('f1', 1.0, 15.0) + # Test the returned selector. + self.assertEqual(selector.path_string, '') + self.assertEqual(selector.parameter_name, 'f1') + self.assertEqual(selector.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, 'f1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[0].feasible_values + self.assertIsNone(space.parameters[0].default_value) + + _ = space.select_root().add_float_param('f2', 2.0, 16.0) + self.assertLen(space.parameters, 2) + self.assertEqual(space.parameters[0].name, 'f1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[1].name, 'f2') + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + + def testAddFloatParam(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_float_param( + 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, 'f1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + def testAddDiscreteParamIntegerFeasibleValues(self): + """Test a Discrete parameter with integer feasible values.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_discrete_param( + 'd1', [101, 15.0, 21.0], default_value=15.0) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, 'd1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DISCRETE) + self.assertEqual(space.parameters[0].bounds, (15.0, 101.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, [15.0, 21.0, 101]) + self.assertEqual(space.parameters[0].default_value, 15.0) + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.INTEGER) + + def testAddDiscreteParamFloatFeasibleValues(self): + """Test a Discrete parameter with float feasible values.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_discrete_param( + 'd1', [15.1, 21.0, 101], default_value=15.1) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.FLOAT) + + def testAddBooleanParam(self): + """Test a Boolean parameter.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_bool_param('b1', default_value=True) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, 'b1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) + with self.assertRaisesRegex(ValueError, + 'Accessing bounds of a categorical.*'): + _ = space.parameters[0].bounds + self.assertIsNone(space.parameters[0].scale_type) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, ['False', 'True']) + self.assertEqual(space.parameters[0].default_value, 'True') + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.BOOLEAN) + + def testAddBooleanParamWithFalseDefault(self): + """Test a Boolean parameter.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_bool_param('b1', default_value=False) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].default_value, 'False') + + def testAddTwoFloatParams(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_float_param( + 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) + _ = space.select_root().add_float_param( + 'f2', 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG) + + self.assertLen(space.parameters, 2) + + self.assertEqual(space.parameters[0].name, 'f1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, 'f2') + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testChainAddTwoFloatParams(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + root.add_float_param( + 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) + root.add_float_param( + 'f2', 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG) + + self.assertLen(space.parameters, 2) + + self.assertEqual(space.parameters[0].name, 'f1') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, 'f2') + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testMultidimensionalParameters(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + selector0 = space.select_root().add_float_param( + 'f', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG, index=0) + selector1 = space.select_root().add_float_param( + 'f', + 2.0, + 10.0, + default_value=4.0, + scale_type=pc.ScaleType.LINEAR, + index=1) + # Test the returned selectors. + self.assertEqual(selector0.path_string, '') + self.assertEqual(selector0.parameter_name, 'f[0]') + self.assertEqual(selector0.parameter_values, []) + self.assertEqual(selector1.path_string, '') + self.assertEqual(selector1.parameter_name, 'f[1]') + self.assertEqual(selector1.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 2) + self.assertEqual(space.parameters[0].name, 'f[0]') + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, 'f[1]') + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 10.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testConditionalParameters(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + root.add_categorical_param( + 'model_type', ['linear', 'dnn'], default_value='dnn') + # Test the selector. + self.assertEqual(root.path_string, '') + self.assertEqual(root.parameter_name, '') + self.assertEqual(root.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, 'model_type') + self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) + with self.assertRaisesRegex(ValueError, + 'Accessing bounds of a categorical.*'): + _ = space.parameters[0].bounds + self.assertIsNone(space.parameters[0].scale_type) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, ['dnn', 'linear']) + self.assertEqual(space.parameters[0].default_value, 'dnn') + + dnn = root.select('model_type', ['dnn']) + # Test the selector. + self.assertEqual(dnn.path_string, '') + self.assertEqual(dnn.parameter_name, 'model_type') + self.assertEqual(dnn.parameter_values, ['dnn']) + dnn.add_float_param( + 'learning_rate', + 0.0001, + 1.0, + default_value=0.001, + scale_type=base_study_config.ScaleType.LOG) + # Test the search space. + self.assertLen(space.parameters, 1) + + linear = root.select('model_type', ['linear']) + # Test the selector. + self.assertEqual(linear.path_string, '') + self.assertEqual(linear.parameter_name, 'model_type') + self.assertEqual(linear.parameter_values, ['linear']) + linear.add_float_param( + 'learning_rate', + 0.1, + 1.0, + default_value=0.1, + scale_type=base_study_config.ScaleType.LOG) + # Test the search space. + self.assertLen(space.parameters, 1) + + dnn_optimizer = dnn.add_categorical_param('optimizer_type', + ['adam', 'adagrad']) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(dnn_optimizer.path_string, 'model_type=dnn') + self.assertEqual(dnn_optimizer.parameter_name, 'optimizer_type') + self.assertEqual(dnn_optimizer.parameter_values, []) + + # Chained select() calls, path length of 1. + lr = root.select('model_type', ['dnn']).select( + 'optimizer_type', ['adam']).add_float_param( + 'learning_rate', + 0.1, + 1.0, + default_value=0.1, + scale_type=base_study_config.ScaleType.LOG) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(lr.parameter_name, 'learning_rate') + self.assertEqual(lr.parameter_values, []) + self.assertEqual(lr.path_string, 'model_type=dnn/optimizer_type=adam') + + # Chained select() calls, path length of 2. + ko = root.select('model_type', ['dnn']).select('optimizer_type', + ['adam']).add_bool_param( + 'use_keras_optimizer', + default_value=False) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(ko.parameter_name, 'use_keras_optimizer') + self.assertEqual(ko.parameter_values, []) + self.assertEqual(ko.path_string, 'model_type=dnn/optimizer_type=adam') + + ko.select_values(['True']) + self.assertEqual(ko.parameter_values, ['True']) + + selector = ko.add_float_param('keras specific', 1.3, 2.4, default_value=2.1) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(selector.parameter_name, 'keras specific') + self.assertEqual(selector.parameter_values, []) + self.assertEqual( + selector.path_string, + 'model_type=dnn/optimizer_type=adam/use_keras_optimizer=True') + + # Selects more than one node. + # selectors = dnn.select_all('optimizer_type', ['adam']) + # self.assertLen(selectors, 2) + + def testConditionalParametersWithReturnedSelectors(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + model_type = root.add_categorical_param('model_type', ['linear', 'dnn']) + learning_rate = model_type.select_values(['dnn']).add_float_param( + 'learning_rate', + 0.1, + 1.0, + default_value=0.001, + scale_type=base_study_config.ScaleType.LOG) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selectors. + self.assertEqual(model_type.parameter_values, ['dnn']) + self.assertEqual(learning_rate.parameter_name, 'learning_rate') + self.assertEqual(learning_rate.parameter_values, []) + self.assertEqual(learning_rate.path_string, 'model_type=dnn') + + # It is possible to select different values for the same selector. + optimizer_type = model_type.select_values(['linear', + 'dnn']).add_categorical_param( + 'optimizer_type', + ['adam', 'adagrad']) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selectors. + self.assertEqual(model_type.parameter_values, ['linear', 'dnn']) + self.assertEqual(optimizer_type.parameter_name, 'optimizer_type') + self.assertEqual(optimizer_type.parameter_values, []) + self.assertEqual(optimizer_type.path_string, 'model_type=linear') + + @parameterized.named_parameters( + ('Multi', 'units[0]', ('units', 0)), + ('Multi2', 'with_underscore[1]', ('with_underscore', 1)), + ('NotMulti', 'units', None), + ('NotMulti2', 'with space', None), + ('NotMulti3', 'with[8]space', None), + ('NotMulti4', 'units[0][4]', ('units[0]', 4)), + ('GinStyle', '_gin.ambient_net_exp_from_vec.block_type[3]', + ('_gin.ambient_net_exp_from_vec.block_type', 3)), + ) + def testParseMultiDimensionalParameterName(self, name, expected): + base_name_index = base_study_config.SearchSpaceSelector.parse_multi_dimensional_parameter_name( + name) + self.assertEqual(base_name_index, expected) + + +class SearchSpaceContainsTest(absltest.TestCase): + + def _space(self): + space = base_study_config.SearchSpace() + root = space.select_root() + root.add_float_param('learning-rate', 1e-4, 1e-2) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + return space + + def testFloatCat1(self): + self._space().assert_contains( + trial.ParameterDict({ + 'optimizer': 'adagrad', + 'learning-rate': 1e-2 + })) + + def testFloatCat2(self): + self.assertFalse(self._space().contains( + trial.ParameterDict({ + 'optimizer': 'adagrad', + 'BADPARAM': 1e-2 + }))) + + def testFloatCat3(self): + self.assertFalse(self._space().contains( + trial.ParameterDict({ + 'optimizer': 'adagrad', + 'learning-rate': 1e-2, + 'BADPARAM': 1e-2 + }))) + + def testFloatCat4(self): + self.assertFalse(self._space().contains( + trial.ParameterDict({ + 'optimizer': 'adagrad', + 'learning-rate': 1e2 + }))) + + +if __name__ == '__main__': + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/common.py b/google/cloud/aiplatform/vizier/pyvizier/common.py new file mode 100644 index 0000000000..15c710fa0f --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/common.py @@ -0,0 +1,510 @@ +"""Common classes shared between Study and Trial.""" + +import collections +from collections import abc +from typing import DefaultDict, Dict, overload +from typing import Iterable, List, Optional, Tuple, TypeVar, Union, Type +import attr + +from google.protobuf import any_pb2 +from google.protobuf.message import Message + +M = TypeVar('M', bound=Message) +T = TypeVar('T') +MetadataValue = Union[str, any_pb2.Any, Message] + +# Namespace Encoding. +# +# By definition, ∀ ns ∈ Namespace, Namespace.decode(ns.encode()) == ns. +# The tricky part of that definition is handling namespaces with components +# that are empty strings. Notably, we want to make sure that +# Namespace(()).encode() != Namespace(('',)).encode(). +# So, we set up the mapping: +# Namespace(()).encode() -> '' +# Namespace((s,)).encode() -> ':s' +# Namespace((s, s)).encode() -> ':s:s', +# et cetera, and note that every tuple gets a unique encoding, even if $s is the +# empty string. (As long as we escape colons properly.) +# +# So, ns.encode() is a bijection, therefore it has an inverse which we call +# Namespace.decode(s). + + +def _parse(arg: str) -> Tuple[str, ...]: + """Parses an encoded namespace string into a namespace tuple.""" + # The tricky part here is that arg.split('') has a length of 1, so it can't + # generate a zero-length tuple; we handle that corner case manually. + if not arg: + return () + # And, then, once we've handled the case of _parse(''), we note that all the + # other encoded strings begin with a colon. It thus contains no information + # and we can remove it. + # TODO: Once we're on Python 3.9, use: arg = arg.removeprefix(':') + if arg.startswith(':'): + arg = arg[1:] + # The rest of the algorithm is that we split on all colons, both + # escaped and unescaped. Then, we walk through the list of fragments and + # join back together the colons that were preceeded by an escape character, + # dropping the escape character as we go. + fragments = arg.split(':') + output = [] + join = False + for frag in fragments: + if join and frag and frag[-1] == '\\': + output[-1] += ':' + frag[:-1] + join = True + elif join: # Doesn't end in an escape character. + output[-1] += ':' + frag + join = False + elif frag and frag[-1] == '\\': # Don't join to previous. + output.append(frag[:-1]) + join = True + else: # Don't join to previous and doesn't end in an escape. + output.append(frag) + join = False + return tuple(output) + + +@attr.frozen(eq=True, order=True, hash=True, auto_attribs=True, init=False) +class Namespace(abc.Sequence): + r"""A namespace for the Metadata class. + + Namespaces form a tree; a particular namespace can be thought of as a tuple of + namespace components. + + You can create a Namespace from a string, with Namespace.decode(s), where + the string is parsed into components, splitting at colons; decode('a:b') gives + you a two-component namespace: ('a', 'b'). + Or, you can create that same Namespace from a tuple of strings/components + e.g. by constructing Namespace(('a', 'b')). In the tuple case, the strings + are not parsed and colons are ordinary characters. + + TLDR: If you decode() a namespace from a string, then ":" is a + reserved character, but when constructing from a tuple, there are no + reserved characters. + + Decoding the string form: + * Initial colons don't matter: Namespace.decode(':a') == Namespace('a'); + this is a single-component namespace. + * Colons separate components: + Namespace.decode('a:b') == Namespace(['a', 'b']). + (This is a two-component namespace.) + * Colons are encoded as r'\:': + Namespace.decode('a\\:b') == Namespace(('a:b')). + (This is a single-component namespace.) + + Conversions: For a Namespace x, + * Namespace.decode(x.encode()) == x; here, x.encode() will be a string with + colons separating the components. + * Namespaces act as a Sequence[str], so Namespace(tuple(x)) == x and + Namespace(x) == x. + """ + + _as_tuple: Tuple[str, ...] = attr.field(hash=True, eq=True, order=True) + + def __init__(self, arg: Iterable[str] = ()): + """Generates a Namespace from its component strings. + + Args: + arg: a tuple representation of a namespace. + """ + arg = tuple(arg) + self.__attrs_init__(as_tuple=arg) + + _ns_repr_table = str.maketrans({':': r'\:'}) + + @classmethod + def decode(cls, s: str) -> 'Namespace': + r"""Decode a string into a Namespace. + + For a Namespace x, Namespace.decode(x.encode()) == x. + + Args: + s: A string where ':' separates namespace components, and colon is + escaped as r'\:'. + + Returns: + A namespace. + """ + return Namespace(_parse(s)) + + def encode(self) -> str: + """Encodes a Namespace into a string. + + Given a Namespace x, Namespace.decode(x.encode()) == x. + + Returns: + Colons are escaped, then Namespace components are joined by colons. + """ + return ''.join( + [':' + c.translate(self._ns_repr_table) for c in self._as_tuple]) + + def __len__(self) -> int: + """Number of components (elements of the tuple form).""" + return len(self._as_tuple) + + def __add__(self, other: Iterable[str]) -> 'Namespace': + """Appends components onto the namespace.""" + return Namespace(self._as_tuple + tuple(other)) + + @overload + def __getitem__(self, key: int) -> str: + ... + + @overload + def __getitem__(self, key: slice) -> 'Namespace': + ... + + def __getitem__(self, key): + """Retrieves item by the specified key.""" + if isinstance(key, int): + return self._as_tuple[key] + return Namespace(self._as_tuple[key]) + + def __str__(self) -> str: + """Shows the namespace, fully escaped.""" + return self.encode() + + def __repr__(self) -> str: + """Shows the namespace, fully escaped.""" + return f'Namespace({self.encode()})' + + def startswith(self, prefix: Iterable[str]) -> bool: + """Returns True if this namespace starts with prefix.""" + ns_prefix = Namespace(prefix) + return self[:len(ns_prefix)] == ns_prefix + + +class _MetadataSingleNameSpace(Dict[str, MetadataValue]): + """Stores metadata associated with one namespace.""" + pass + + +class Metadata(abc.MutableMapping): + """Metadata class. + + This is the main interface for reading metadata from a Trial (writing metadata + should typically be done via the MetadataUpdateContext class.) + + This behaves like a str->str dict, within a given namespace. + mm = Metadata({'foo': 'Foo'}) + mm.get('foo') # Returns 'Foo' + mm['foo'] # Returns 'Foo' + mm['bar'] = 'Bar' + mm.update({'a': 'A'}, gleep='Gleep') + + 1. Keys are namespaced. Each Metadata object only interacts with one + Namespace, but a metadata object and its children share a + common set of (namespace, key, value) triplets. + + Namespaces form a tree, and you can walk down the tree. There are two + namespace operators: ns(s) which adds component(s) on to the namespace, and + abs_ns() which specifies the entire namespace. + + A Metadata() object is always created at the root of the namespace tree, + and the root is special (it's the only namespace that Vizier users can write + or conveniently read). Pythia algorithm developers should avoid the root + namespace, unless they intend to pass data to/from Vizier users. + + mm = Metadata({'foo': 'foofoo'}) + mm.ns('NewName')['bar'] = 'Bar' + mm['foo'] # Returns 'foofoo' + mm['bar'] # Throws a KeyError + mm.ns('NewName')['foo'] # Throws a KeyError + mm.ns('NewName')['bar'] # Returns 'Bar' + mm.ns('NewName').get('bar') # Returns 'Bar' + + # Use of abs_ns(). + mm = Metadata() + mm.abs_ns(Namespace(('NewName',)))['bar'] = 'Bar' + mm.abs_ns(Namespace(('NewName',))) # returns 'Bar' + + # Multi-component namespaces. + mm = Metadata() + mm.ns('a').ns('b')['foo'] = 'AB-foo' + mm.ns('a')['foo'] = 'A-foo' + mm['foo'] # Throws a KeyError + mm.ns('a')['foo'] # returns 'A-foo' + mm.ns('a').ns('b')['foo'] # returns 'AB-foo' + mm.abs_ns(Namespace(('a', 'b'))).get('foo') # Returns 'ab-foo' + mm.abs_ns(Namespace.decode('a:b')).get('foo') # Returns 'ab-foo' + + 2. Values can be protobufs. If `metadata['foo']` is an instance of `MyProto` + proto message or `Any` proto that packs a `MyProto` message, then the proto + can be recovered by calling: + my_proto = metadata.get_proto('foo', cls=MyProto) + isinstance(my_proto, MyProto) # Returns `True` + + 3. An iteration over a Metadata object only shows you the data in the current + namespace. So, + + mm = Metadata({'foo': 'foofoo'}) + for k, v in mm.ns('gleep'): + ... + + will not yield anything because there are no keys in the 'gleep' namespace. + WARNING: Because of this behavior, Metadata(mm) will quietly drop metadata + from all but mm's current namespace. + + Be aware that type(v) is MetadataValue, which includes protos in addition to + strings. + + To iterate over all the keys in all the namespaces use the namespaces() + method. + + mm : Metadata + for ns in mm.namespaces(): + for k, v in mm.abs_ns(ns).items(): + ... + """ + + def __init__(self, *args: Union[Dict[str, MetadataValue], + Iterable[Tuple[str, MetadataValue]]], + **kwargs: MetadataValue): + """Construct; this follows dict(), and puts data in the root namespace. + + You can pass it a dict, or an object that yields (key, value) + pairs, and those pairs will be put in the root namespace. + + Args: + *args: A dict or an iterable the yields key-value pairs. + **kwargs: key=value pairs to be added to the specified namespace. + """ + self._stores: DefaultDict[ + Namespace, _MetadataSingleNameSpace] = collections.defaultdict( + _MetadataSingleNameSpace) + self._namespace = Namespace() + self._store = self._stores[self._namespace] + self._store.update(*args, **kwargs) + + def abs_ns(self, namespace: Iterable[str] = ()) -> 'Metadata': + """Switches to a specified absolute namespace. + + All the Metadata object's data is shared between $self and the returned + object, but the new Metadata object will have a different default + namespace. + + Args: + namespace: a list of Namespace components. (Defaults to the root, empty + Namespace.) + + Returns: + A new Metadata object in the specified namespace; the new object shares + data (except the namespace) with $self. + """ + return self._copy_core(Namespace(namespace)) + + def ns(self, component: str) -> 'Metadata': + r"""Switches to a deeper namespace by appending a component. + + All the metadata is shared between $self and the returned value, but they + have a different current namespace. + + Args: + component: one component to be added to the current namespace. + + Returns: + A new Metadata object in the specified namespace; the new object shares + metadata (except the choice of namespace) with $self. + """ + new_ns: Namespace = self._namespace + (component,) + return self._copy_core(new_ns) + + def __repr__(self) -> str: + itemlist: List[str] = [] + for namespace, store in self._stores.items(): + item_string = f'(namespace:{namespace}, items: {store})' + itemlist.append(item_string) + return 'Metadata({}, current_namespace={})'.format(', '.join(itemlist), + self._namespace.encode()) + + def __str__(self) -> str: + return 'namespace: {} items: {}'.format(str(self._namespace), self._store) + + def get_proto(self, key: str, *, cls: Type[M]) -> Optional[M]: + """Deprecated. + + Use get() instead. + + Gets the metadata as type `cls`, or None if not possible. + + Args: + key: + cls: Pass in a proto ***class***, not a proto object. + + Returns: + Proto message, if the value associated with the key exists and + can be parsed into cls; None otherwise. + """ + value = self._store.get(key, None) + if value is None: + return None + + if isinstance(value, cls): + # Starting from 3.10, pytype supports typeguard, which obsoletes + # the need for the `pytype:disable` clause. + return value # pytype: disable=bad-return-type + + if isinstance(value, any_pb2.Any): + # `value` is an Any proto potentially packing `cls`. + message = cls() + success = value.Unpack(message) + return message if success else None + + return None + + def get(self, + key: str, + default: Optional[T] = None, + *, + cls: Type[T] = str) -> Optional[T]: + """Gets the metadata as type `cls`, or None if not possible. + + Given regular string values, this function behaves exactly like a + regular string-to-string dict (within its namespace). + metadata = common.Metadata({'key': 'value'}) + assert metadata.get('key') == 'value' + assert metadata.get('badkey', 'badvalue') == 'badvalue' + + Example with numeric string values: + metadata = common.Metadata({'float': '1.2', 'int': '60'}) + assert metadata.get('float', cls=float) == 1.2 + assert metadata.get('badkey', 0.2, cls=float) == 0.2 + assert metadata.get('int', cls=int) == 60 + assert metadata.get('badkey', 1, cls=int) == 1 + + Example with `Duration` and `Any` proto values: + duration = Duration(seconds=60) + anyproto = Any() + anyproto.Pack(duration) + metadata = common.Metadata({'duration': duration, 'any': anyproto}) + assert metadata.get('duration', cls=Duration) == duration + assert metadata.get('any', cls=Duration) == duration + + Args: + key: + default: Default value. + cls: Desired type of the value. + + Returns: + Default if the key does not exist. Otherwise, the matching value is + parsed into type `cls`. For proto messages, it involves unpacking + Any proto. + """ + try: + value = self._store[key] + except KeyError: + return default + if isinstance(value, cls): + # Starting from 3.10, pytype supports typeguard, which obsoletes + # the need for the `pytype:disable` clause. + return value # pytype: disable=bad-return-type + if isinstance(value, any_pb2.Any): + # `value` is an Any proto potentially packing `cls`. + message = cls() + success = value.Unpack(message) + return message if success else None + return cls(value) + + # TODO: Rename to `abs_namespaces` + def namespaces(self) -> Tuple[Namespace, ...]: + """Get all namespaces for which there is at least one key. + + Returns: + For all `ns` in `md.namespaces()`, `md.abs_ns(ns)` is not empty. + """ + return tuple([ns for ns, store in self._stores.items() if store]) + + # TODO: Rename to `namespaces` + def subnamespaces(self) -> Tuple[Namespace, ...]: + """Returns relative namespaces that are at or below the current namespace. + + For all `ns` in `md.subnamespaces()`, `md.abs_ns(md.current_ns() + ns)` is + not empty. E.g. if namespace 'foo:bar' is non-empty, and you're in + namespace 'foo', then the result will contain namespace 'bar'. + + Returns: + For namespaces that begin with the current namespace and are + non-empty, this returns a namespace object that contains the relative + path from the current namespace. + """ + return tuple([ + Namespace(ns[len(self._namespace):]) + for ns, store in self._stores.items() + if store and ns.startswith(self._namespace) + ]) + + def current_ns(self) -> Namespace: + """Displays the object's current Namespace.""" + return self._namespace + + # START OF abstract methods inherited from `MutableMapping` base class. + def __getitem__(self, key: str) -> MetadataValue: + return self._store.__getitem__(key) + + def __setitem__(self, key: str, value: MetadataValue): + self._store[key] = value + + def __delitem__(self, key: str): + del self._store[key] + + def __iter__(self): + return iter(self._store) + + def __len__(self): + return len(self._store) + + def __copy__(self) -> 'Metadata': + """Shallow copy -- metadata continues to be shared. + + Returns: + A copy of the object. + """ + return self._copy_core(self._namespace) + + # END OF Abstract methods inherited from `MutableMapping` base class. + + def _copy_core(self, ns: Namespace) -> 'Metadata': + """Shallow copy: metadata is shared, default namespace changes. + + Args: + ns: the namespace to use for the new object. + + Returns: + A copy of the object. + """ + md = Metadata() + md._namespace = ns # pylint: disable='protected-access' + md._stores = self._stores # pylint: disable='protected-access' + md._store = md._stores[md._namespace] # pylint: disable='protected-access' + return md + + def update(self, *args: Union[Dict[str, MetadataValue], + Iterable[Tuple[str, MetadataValue]]], + **kwargs: MetadataValue) -> None: + self._store.update(*args, **kwargs) + + def attach(self, other: 'Metadata') -> None: + """Attach the $other metadata as a descendent of this metadata. + + More precisely, it takes the part of `other`'s namespace that is at or + below `other`'s current namespace, and attaches it to `self`'s current + namespace. + * Tree structure is preserved and nothing is flattened. + * Attached data overwrites existing data, item-by-item, not + namepace-by-namespace. + + So, if we have + other = Metadata() + other.abs_ns(Namespace.(('x', 'y', 'z'))['foo'] = 'bar' + m = Metadata() + m.ns('w').attach(other.ns('x')) + then + m.abs_ns(('w', 'y', 'z'))['foo'] will contain 'bar'. + + Args: + other: a Metadata object to copy from. + """ + for ns in other.subnamespaces(): + self._stores[self._namespace + ns].update( + other.abs_ns(other.current_ns() + ns)) diff --git a/google/cloud/aiplatform/vizier/pyvizier/common_test.py b/google/cloud/aiplatform/vizier/pyvizier/common_test.py new file mode 100644 index 0000000000..54162585c4 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/common_test.py @@ -0,0 +1,334 @@ +"""Tests for vizier.pyvizier.shared.common.""" + +import copy +from vizier._src.pyvizier.shared import common +from google.protobuf import any_pb2 +from google.protobuf import duration_pb2 +from absl.testing import absltest + + +class MetadataGetClsTest(absltest.TestCase): + + def test_get_proto(self): + duration = duration_pb2.Duration(seconds=60) + anyproto = any_pb2.Any() + anyproto.Pack(duration) + metadata = common.Metadata(duration=duration, any=anyproto) + + self.assertEqual( + metadata.get_proto('duration', cls=duration_pb2.Duration), duration) + self.assertEqual( + metadata.get_proto('any', cls=duration_pb2.Duration), duration) + self.assertEqual( + metadata.get('duration', cls=duration_pb2.Duration), duration) + self.assertEqual(metadata.get('any', cls=duration_pb2.Duration), duration) + + def test_get_int(self): + metadata = common.Metadata({'string': '30', 'int': '60'}) + self.assertEqual(metadata.get('string', cls=int), 30) + self.assertEqual(metadata.get('int', cls=int), 60) + self.assertEqual(metadata.get('badkey', 1, cls=int), 1) + + +class MetadataNamespaceTest(absltest.TestCase): + + def test_basic(self): + ns0 = common.Namespace() + self.assertEmpty(ns0) + self.assertEqual(str(ns0), '') + self.assertEqual(ns0.encode(), '') + self.assertEqual(ns0, common.Namespace.decode('')) + n1t = common.Namespace(('aerer',)) + self.assertLen(n1t, 1) + n1 = common.Namespace.decode('a78') + self.assertLen(n1, 1) + self.assertEqual(str(n1), ':a78') + n2 = common.Namespace(('a78', 'bfe')) + self.assertLen(n2, 2) + n2s1 = common.Namespace.decode('a78:bfe') + self.assertLen(n2s1, 2) + self.assertEqual(n2.encode(), n2s1.encode()) + n2s2 = common.Namespace.decode(':a78:bfe') + self.assertLen(n2s2, 2) + self.assertEqual(n2.encode(), n2s2.encode()) + self.assertEqual(n2, n2s2) + self.assertEqual(n2s1, n2s2) + ns = common.Namespace(('a', 'b')) + self.assertLen(ns, 2) + self.assertEqual(tuple(ns), ('a', 'b')) + self.assertEqual(str(ns), ':a:b') + self.assertEqual(ns.encode(), ':a:b') + + def test_escape(self): + s1 = 'a\\:A' + ns1 = common.Namespace.decode(s1) + self.assertLen(ns1, 1) + self.assertEqual(str(ns1), ':a\\:A') + self.assertEqual(ns1.encode(), ':' + s1) + self.assertEqual(common.Namespace.decode(ns1.encode()), ns1) + # + s2 = 'b:B' + ns2 = common.Namespace.decode(s2) + self.assertLen(ns2, 2) + self.assertEqual(str(ns2), ':' + s2) + self.assertEqual(ns2.encode(), ':' + s2) + self.assertEqual(common.Namespace.decode(ns2.encode()), ns2) + # + s1e1 = ':b\\B' + ns1e1 = common.Namespace.decode(s1e1) + self.assertLen(ns1e1, 1) + self.assertEqual(ns1e1.encode(), s1e1) + self.assertEqual(common.Namespace.decode(ns1e1.encode()), ns1e1) + ns1e2 = common.Namespace((s1e1.lstrip(':'),)) + self.assertLen(ns1e2, 1) + self.assertEqual(ns1e2.encode(), s1e1) + self.assertEqual(ns1e2, ns1e1) + self.assertEqual(common.Namespace.decode(ns1e2.encode()), ns1e2) + # + s1c = r':b\:B' + ns1c = common.Namespace.decode(s1c) + self.assertLen(ns1c, 1) + # Initial colon is harmlessly removed. + self.assertEqual(ns1c.encode(), s1c) + self.assertEqual(common.Namespace.decode(ns1c.encode()), ns1c) + self.assertEqual(common.Namespace(('b:B',)), ns1c) + + +class MetadataTest(absltest.TestCase): + + def create_test_metadata(self): + md = common.Metadata({'bar': 'bar_v'}, foo='foo_v') + md.ns('Name').update(foo='Name_foo_v', baz='Name_baz_v') + return md + + def test_empty_namespaces(self): + md = common.Metadata() + self.assertEmpty(list(md.namespaces())) + md = common.Metadata().ns('ns') + self.assertEmpty(list(md.namespaces())) + + def test_nonempty_namespaces(self): + mm = self.create_test_metadata() + self.assertLen(mm.namespaces(), 2) + + def test_getters_are_consistent_when_item_is_in_dict(self): + mm = self.create_test_metadata() + self.assertEqual(mm['foo'], 'foo_v') + self.assertEqual(mm.get('foo'), 'foo_v') + + def test_getters_are_consistent_when_item_is_not_in_dict(self): + mm = self.create_test_metadata() + self.assertIsNone(mm.get('baz')) + with self.assertRaises(KeyError): + _ = mm['baz'] + + def test_separator_is_not_allowed_as_keys_after_init(self): + mm = self.create_test_metadata() + with self.assertRaises(KeyError): + _ = mm['Name_foo'] + + def test_namespace_works_as_intended(self): + mm = self.create_test_metadata() + self.assertEqual(mm.ns('Name')['foo'], 'Name_foo_v') + self.assertIsNone(mm.ns('Name').get('bar')) + + mm_name = mm.ns('Name') + self.assertEqual(mm_name['foo'], 'Name_foo_v') + self.assertIsNone(mm_name.get('bar')) + self.assertEqual(mm.ns('Name')['foo'], 'Name_foo_v') + + def test_create_new_namespace(self): + # Calling ns() with an unexisting namespace should work fine. + mm = self.create_test_metadata() + mm.ns('NewName')['foo'] = 'NewName_foo_v' + self.assertEqual(mm.ns('NewName')['foo'], 'NewName_foo_v') + self.assertIsNone(mm.ns('NewName').get('bar')) + + def test_changing_namespace_copies_reference(self): + mm = self.create_test_metadata() + # Calling ns() copies by reference so any changes to the returned Metadata + # object is reflected in the original object. + mm_in_namespace = mm.ns('Name') + mm_in_namespace['foofoo'] = 'Name_foofoo_v' + self.assertEqual(mm.ns('Name')['foofoo'], 'Name_foofoo_v') + + def test_iterators(self): + mm = self.create_test_metadata() + self.assertSequenceEqual(list(mm.keys()), ['bar', 'foo']) + self.assertSequenceEqual( + list(mm.ns('Name').values()), ['Name_foo_v', 'Name_baz_v']) + self.assertLen(list(mm.items()), 2) + + def test_repr_str(self): + mm = self.create_test_metadata() + self.assertNotEmpty(str(mm), '') + self.assertNotEmpty(repr(mm), repr('')) + + def test_update(self): + md = common.Metadata(foo='foo_v') + md.ns('Name').update(foo='Name_foo_v', baz='Name_baz_v') + + md2 = common.Metadata() + md2.ns('Name').update(foo='Name_foo_v2', bar='Name_bar_v2') + + md.ns('Name').update(md2.ns('Name')) + + self.assertLen(md.ns('Name'), 3) + self.assertIn('bar', md.ns('Name')) + + def test_copy(self): + # There's no useful distinction to be made between copy.copy() and + # copy.deepcopy(). + mm = common.Metadata().ns('ns1') + mm.update(foo='bar') + mm_copy = copy.copy(mm) + mm_deepcopy = copy.deepcopy(mm) + # Check that copies match. + self.assertEqual(mm['foo'], 'bar') + self.assertEqual(mm_copy['foo'], 'bar') + self.assertEqual(mm_deepcopy['foo'], 'bar') + self.assertEqual(mm_deepcopy.namespaces(), mm.namespaces()) + self.assertEqual(mm_copy.namespaces(), mm.namespaces()) + # Check that the deep copy is disconnected. + mm_deepcopy['nerf'] = 'gleep' + with self.assertRaises(KeyError): + mm['nerf'] # pylint: disable=pointless-statement + with self.assertRaises(KeyError): + mm_copy['nerf'] # pylint: disable=pointless-statement + # Check that the shallow copy shares the metadata store with the original. + mm_copy['blip'] = 'tonk' + self.assertEqual(mm['blip'], mm_copy['blip']) + # ... but no sharing with the deep copy. + with self.assertRaises(KeyError): + mm_deepcopy['blip'] # pylint: disable=pointless-statement + # Here's a test for a specific bug, where Metadata._store is improperly + # disconnected from Metadata._stores. + mx = common.Metadata() + copy.copy(mx).ns('A')['a'] = 'Aa' + self.assertEqual(mx.ns('A')['a'], 'Aa') + + def test_construction(self): + # Test with iterables. + m0i = common.Namespace([]) + self.assertEmpty(m0i) + m0d = common.Namespace.decode('') + self.assertEmpty(m0d) + self.assertEqual(m0d, m0i) + m1i = common.Namespace(['abc']) + self.assertLen(m1i, 1) + self.assertEqual(m1i, common.Namespace(tuple(m1i))) + self.assertEqual(m1i, common.Namespace.decode(m1i.encode())) + m2i = common.Namespace(['abc', 'def']) + self.assertLen(m2i, 2) + self.assertEqual(m2i, common.Namespace(tuple(m2i))) + self.assertEqual(m2i, common.Namespace.decode(m2i.encode())) + m3i = common.Namespace(['abc', 'de:f']) + self.assertLen(m3i, 2) + self.assertEqual(m3i, common.Namespace(tuple(m3i))) + self.assertEqual(m3i, common.Namespace.decode(m3i.encode())) + # Test with strings. + m1sc = common.Namespace.decode(':abc') + self.assertLen(m1sc, 1) + self.assertEqual(m1sc, common.Namespace(tuple(m1sc))) + self.assertEqual(m1sc, common.Namespace.decode(m1sc.encode())) + m1s = common.Namespace.decode('abc') + self.assertLen(m1s, 1) + self.assertEqual(m1s, common.Namespace(tuple(m1s))) + self.assertEqual(m1s, common.Namespace.decode(m1s.encode())) + m2s = common.Namespace.decode('abc:def') + self.assertLen(m2s, 2) + self.assertEqual(m2s, common.Namespace(tuple(m2s))) + self.assertEqual(m2s, common.Namespace.decode(m2s.encode())) + m3s = common.Namespace.decode('abc:de\\f') + self.assertLen(m3s, 2) + self.assertEqual(m3s, common.Namespace(tuple(m3s))) + self.assertEqual(m3s, common.Namespace.decode(m3s.encode())) + + def test_startswith(self): + m1 = common.Namespace(['aa', 'bb']) + self.assertTrue(m1.startswith(common.Namespace(['aa']))) + self.assertTrue(m1.startswith(common.Namespace(['aa', 'bb']))) + self.assertTrue(m1.startswith(m1)) + self.assertTrue(m1.startswith(common.Namespace(tuple(m1)))) + self.assertFalse(m1.startswith(common.Namespace(['bb']))) + self.assertFalse(m1.startswith(common.Namespace(['aa', 'bb', 'cc']))) + self.assertFalse(m1.startswith(common.Namespace(['bb', 'bb']))) + self.assertFalse(m1.startswith(common.Namespace(['aa', 'aa']))) + + def test_subnamespace(self): + mm = common.Metadata() + mm.ns('ns1')['foo'] = 'bar' + mm.ns('ns2')['foo'] = 'bar' + mm.ns('ns1').ns('ns11')['foo'] = 'bar' + mm.ns('ns1').ns('ns:11')['gleep'] = 'nerf' + + self.assertSequenceEqual(mm.subnamespaces(), [ + common.Namespace(['ns1']), + common.Namespace(['ns2']), + common.Namespace(['ns1', 'ns11']), + common.Namespace(['ns1', 'ns:11']), + ]) + self.assertSequenceEqual( + mm.ns('ns1').subnamespaces(), [ + common.Namespace([]), + common.Namespace(['ns11']), + common.Namespace(['ns:11']) + ]) + self.assertSequenceEqual(mm.ns('ns2').subnamespaces(), [common.Namespace()]) + self.assertSequenceEqual(mm.ns('ns3').subnamespaces(), []) + + def test_namespace_add(self): + n0 = common.Namespace() + self.assertEmpty(n0) + self.assertEqual(n0 + (), common.Namespace([])) + self.assertEqual(n0 + ('ab',), common.Namespace([ + 'ab', + ])) + self.assertEqual(n0 + ('a:b',), common.Namespace(['a:b'])) + self.assertEqual(n0 + ('a:b',), common.Namespace(['a:b'])) + self.assertEqual(n0 + ('ab', 'cd'), common.Namespace(['ab', 'cd'])) + n1 = common.Namespace(['xy']) + self.assertLen(n1, 1) + self.assertEqual(n1 + ('ab',), common.Namespace(['xy', 'ab'])) + self.assertEqual(n1 + ('a:b',), common.Namespace(['xy', 'a:b'])) + self.assertEqual(n1 + ('a:b',), common.Namespace(['xy', 'a:b'])) + n2 = common.Namespace(['xy', 'zw']) + self.assertLen(n2, 2) + self.assertLen(n2 + ('ab',), 3) + self.assertEqual(n2 + ('ab',), common.Namespace(['xy', 'zw', 'ab'])) + self.assertLen(n2 + ('ab', 'cd'), 4) + self.assertEqual(n2 + ('ab', 'cd'), common.Namespace.decode('xy:zw:ab:cd')) + + def test_metadata_attach(self): + # Set up a metadata tree. + mm = common.Metadata() + mm.ns('ns1').ns('ns:11').update(foo='bar') + mm.ns('ns1').ns('ns12').update(foo='gleep') + mm.ns('ns1').update(foo='nerf') + mm.ns('ns|').update(foo='pag') + # Attach that metadata tree to a branch of an empty tree. + m1 = common.Metadata() + m1.ns('ns0').ns('ns00').attach(mm) + self.assertEmpty(m1.abs_ns()) + self.assertEqual(m1.ns('ns0').ns('ns00'), mm) + self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1', 'ns:11'])['foo'], 'bar') + self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1', 'ns12'])['foo'], 'gleep') + self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1'])['foo'], 'nerf') + self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns|'])['foo'], 'pag') + # Attach just part of $mm to a branch of a new, empty tree. + m2 = common.Metadata() + m2.ns('nsX').attach(mm.ns('ns1')) + self.assertEqual(m2.abs_ns(['nsX', 'ns:11'])['foo'], 'bar') + self.assertEqual(m2.abs_ns(['nsX', 'ns12'])['foo'], 'gleep') + self.assertEqual(m2.abs_ns(['nsX'])['foo'], 'nerf') + # Check that attach() overwrites key collisions, but preserves other data. + m3 = common.Metadata() + m3['foo'] = 'Y' # This will be overwritten. + m3['z'] = 'Z' # This will not be overwritten. + m3.attach(mm.ns('ns1').ns('ns:11')) + self.assertEqual(m3['z'], 'Z') + self.assertEqual(m3['foo'], 'bar') + + +if __name__ == '__main__': + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/context.py b/google/cloud/aiplatform/vizier/pyvizier/context.py new file mode 100644 index 0000000000..4694dac4e0 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/context.py @@ -0,0 +1,47 @@ +"""Wrapper classes for Context protos and other messages in them.""" +from typing import Dict, Optional + +import attr +from google.cloud.aiplatform.vizier.pyvizier.shared import common +from google.cloud.aiplatform.vizier.pyvizier.shared import trial + +Metadata = common.Metadata +ParameterValue = trial.ParameterValue + + +@attr.s(auto_attribs=True, frozen=False, init=True, slots=True) +class Context: + """Wrapper for Context proto.""" + description: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + on_setattr=attr.setters.validate) + + parameters: Dict[str, ParameterValue] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(ParameterValue), + mapping_validator=attr.validators.instance_of(dict)), + on_setattr=attr.setters.validate) # pytype: disable=wrong-arg-types + + metadata: Metadata = attr.ib( + init=True, + kw_only=True, + default=Metadata(), + validator=attr.validators.instance_of(Metadata), + on_setattr=attr.setters.validate) + + related_links: Dict[str, str] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(str), + mapping_validator=attr.validators.instance_of(dict)), + on_setattr=attr.setters.validate) # pytype: disable=wrong-arg-types diff --git a/google/cloud/aiplatform/vizier/pyvizier/context_test.py b/google/cloud/aiplatform/vizier/pyvizier/context_test.py new file mode 100644 index 0000000000..ed9665e972 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/context_test.py @@ -0,0 +1,18 @@ +"""Tests for vizier.pyvizier.shared.context.""" + +from vizier._src.pyvizier.shared import context +from absl.testing import absltest + + +class ContextTest(absltest.TestCase): + + def testDefaultsNotShared(self): + """Make sure default parameters are not shared between instances.""" + context1 = context.Context() + context2 = context.Context() + context1.parameters['x1'] = context.ParameterValue(5) + self.assertEmpty(context2.parameters) + + +if __name__ == '__main__': + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py new file mode 100644 index 0000000000..528f6ec0f5 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py @@ -0,0 +1,529 @@ +"""ParameterConfig wraps ParameterConfig and ParameterSpec protos.""" + +import collections +import copy +import enum +import math +from typing import Generator, List, Optional, Sequence, Tuple, Union + +from absl import logging +import attr + +from google.cloud.aiplatform.vizier.pyvizier import trial + +ExternalType = trial.ExternalType + + +class ParameterType(enum.Enum): + """Valid Values for ParameterConfig.type.""" + DOUBLE = 'DOUBLE' + INTEGER = 'INTEGER' + CATEGORICAL = 'CATEGORICAL' + DISCRETE = 'DISCRETE' + + def is_numeric(self) -> bool: + return self in [self.DOUBLE, self.INTEGER, self.DISCRETE] + + +class ScaleType(enum.Enum): + """Valid Values for ParameterConfig.scale_type.""" + LINEAR = 'LINEAR' + LOG = 'LOG' + REVERSE_LOG = 'REVERSE_LOG' + UNIFORM_DISCRETE = 'UNIFORM_DISCRETE' + + +# A sequence of possible internal parameter values. +MonotypeParameterSequence = Union[Sequence[Union[int, float]], Sequence[str]] +MonotypeParameterList = Union[List[Union[int, float]], List[str]] + + +def _validate_bounds(bounds: Union[Tuple[int, int], Tuple[float, float]]): + """Validates the bounds.""" + if len(bounds) != 2: + raise ValueError('Bounds must have length 2. Given: {}'.format(bounds)) + lower = bounds[0] + upper = bounds[1] + if not all([math.isfinite(v) for v in (lower, upper)]): + raise ValueError( + 'Both "lower" and "upper" must be finite. Given: (%f, %f)' % + (lower, upper)) + if lower > upper: + raise ValueError( + 'Lower cannot be greater than upper: given lower={} upper={}'.format( + lower, upper)) + + +def _get_feasible_points_and_bounds( + feasible_values: Sequence[float] +) -> Tuple[List[float], Union[Tuple[int, int], Tuple[float, float]]]: + """Validates and converts feasible values to floats.""" + if not all([math.isfinite(p) for p in feasible_values]): + raise ValueError('Feasible values must all be finite. Given: {}' % + feasible_values) + + feasible_points = list(sorted(feasible_values)) + bounds = (feasible_points[0], feasible_points[-1]) + return feasible_points, bounds + + +def _get_categories(categories: Sequence[str]) -> List[str]: + """Returns the categories.""" + return sorted(list(categories)) + + +def _get_default_value( + param_type: ParameterType, + default_value: Union[float, int, str]) -> Union[float, int, str]: + """Validates and converts the default_value to the right type.""" + if (param_type in (ParameterType.DOUBLE, ParameterType.DISCRETE) and + (isinstance(default_value, float) or isinstance(default_value, int))): + return float(default_value) + elif (param_type == ParameterType.INTEGER and + (isinstance(default_value, float) or isinstance(default_value, int))): + if isinstance(default_value, int): + return default_value + else: + # Check if the float rounds nicely. + default_int_value = round(default_value) + if not math.isclose(default_value, default_int_value): + raise ValueError('default_value for an INTEGER parameter should be an ' + 'integer, got float: [{}]'.format(default_value)) + return default_int_value + elif (param_type == ParameterType.CATEGORICAL and + isinstance(default_value, str)): + return default_value + raise ValueError( + 'default_value has an incorrect type. ParameterType has type {}, ' + 'but default_value has type {}'.format(param_type.name, + type(default_value))) + + +@attr.s(auto_attribs=True, frozen=True, init=True, slots=True) +class ParameterConfig: + """A Vizier ParameterConfig. + + Use ParameterConfig.factory to create a valid instance. + """ + _name: str = attr.ib( + init=True, validator=attr.validators.instance_of(str), kw_only=True) + _type: ParameterType = attr.ib( + init=True, + validator=attr.validators.instance_of(ParameterType), + repr=lambda v: v.name if v is not None else 'None', + kw_only=True) + # Only one of _feasible_values, _bounds will be set at any given time. + _bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float)), + iterable_validator=attr.validators.instance_of(tuple))), + kw_only=True) + _feasible_values: Optional[MonotypeParameterList] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of((list, tuple)))), + kw_only=True) + _scale_type: Optional[ScaleType] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.instance_of(ScaleType)), + repr=lambda v: v.name if v is not None else 'None', + kw_only=True) + _default_value: Optional[Union[float, int, str]] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.instance_of((float, int, str))), + kw_only=True) + _external_type: Optional[ExternalType] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.instance_of(ExternalType)), + repr=lambda v: v.name if v is not None else 'None', + kw_only=True) + # Parent values for this ParameterConfig. If set, then this is a child + # ParameterConfig. + _matching_parent_values: Optional[MonotypeParameterList] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of((list, tuple)))), + kw_only=True) + # Children ParameterConfig. If set, then this is a parent ParameterConfig. + _child_parameter_configs: Optional[List['ParameterConfig']] = attr.ib( + init=True, kw_only=True) + + # Pytype treats instances of EnumTypeWrapper as types, but they can't be + # evaluated at runtime, so a Union[] of proto enums has to be a forward + # reference below. + @classmethod + def factory( + cls, + name: str, + *, + bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None, + feasible_values: Optional[MonotypeParameterSequence] = None, + children: Optional[Sequence[Tuple[MonotypeParameterSequence, + 'ParameterConfig']]] = None, + scale_type: Optional[ScaleType] = None, + default_value: Optional[Union[float, int, str]] = None, + external_type: Optional[ExternalType] = ExternalType.INTERNAL + ) -> 'ParameterConfig': + """Factory method. + + Args: + name: The parameter's name. Cannot be empty. + bounds: REQUIRED for INTEGER or DOUBLE type. Specifies (min, max). The + type of (min, max) determines the created ParameterConfig's type. + feasible_values: REQUIRED for DISCRETE or CATEGORICAL type. The elements' + type determines the created ParameterConfig's type. + children: sequence of tuples formatted as: (matching_parent_values, + ParameterConfig). See + cs/learning_vizier.service.ParameterConfig.child_parameter_configs for + details. ONLY THE TYPES ARE VALIDATED. If the child ParameterConfig + protos already have parent values set, they will be overridden by the + provided matching_parent_values. + scale_type: Scaling to be applied. NOT VALIDATED. + default_value: A default value for the Parameter. + external_type: An annotation indicating the type this parameter should be + cast to. + + Returns: + A ParameterConfig object which wraps a partially validated proto. + + Raises: + ValueError: Exactly one of feasible_values and bounds must be convertible + to Boolean true. Bounds and numeric feasible_values must be finite. + Bounds and feasible_values, if provided, must consist of + elements of the same type. + TypeError: If children's matching_parent_values are not compatible with + the ParameterConfig being created. + """ + if not name: + raise ValueError('Parameter name cannot be empty.') + + if bool(feasible_values) == bool(bounds): + raise ValueError( + 'While creating Parameter with name={}: exactly one of ' + '"feasible_values" or "bounds" must be provided, but given ' + 'feasible_values={} and bounds={}.'.format(name, feasible_values, + bounds)) + if feasible_values: + if len(set(feasible_values)) != len(feasible_values): + counter = collections.Counter(feasible_values) + duplicate_dict = {k: v for k, v in counter.items() if v > 1} + raise ValueError( + 'Feasible values cannot have duplicates: {}'.format(duplicate_dict)) + if all(isinstance(v, (float, int)) for v in feasible_values): + inferred_type = ParameterType.DISCRETE + feasible_values, bounds = _get_feasible_points_and_bounds( + feasible_values) + elif all(isinstance(v, str) for v in feasible_values): + inferred_type = ParameterType.CATEGORICAL + feasible_values = _get_categories(feasible_values) + else: + raise ValueError( + 'Feasible values must all be numeric or strings. Given {}'.format( + feasible_values)) + else: # bounds were specified. + if isinstance(bounds[0], int) and isinstance(bounds[1], int): + inferred_type = ParameterType.INTEGER + _validate_bounds(bounds) + elif isinstance(bounds[0], float) and isinstance(bounds[1], float): + inferred_type = ParameterType.DOUBLE + _validate_bounds(bounds) + else: + raise ValueError( + 'Bounds must both be integers or doubles. Given: {}'.format(bounds)) + + if default_value is not None: + default_value = _get_default_value(inferred_type, default_value) + + pc = cls( + name=name, + type=inferred_type, + bounds=bounds, + feasible_values=feasible_values, + scale_type=scale_type, + default_value=default_value, + external_type=external_type, + matching_parent_values=None, + child_parameter_configs=None) + if children: + pc = pc.add_children(children) + return pc + + @property + def name(self) -> str: + return self._name + + @property + def type(self) -> ParameterType: + return self._type + + @property + def external_type(self) -> ExternalType: + return self._external_type + + @property + def scale_type(self) -> Optional[ScaleType]: + return self._scale_type + + @property + def bounds(self) -> Union[Tuple[float, float], Tuple[int, int]]: + """Returns the bounds, if set, or raises a ValueError.""" + if self.type == ParameterType.CATEGORICAL: + raise ValueError('Accessing bounds of a categorical parameter: %s' % + self.name) + return self._bounds + + @property + def matching_parent_values(self) -> MonotypeParameterList: + """Returns the matching parent values, if this is a child parameter.""" + if not self._matching_parent_values: + return [] + return copy.copy(self._matching_parent_values) + + @property + def child_parameter_configs(self) -> List['ParameterConfig']: + if not self._child_parameter_configs: + return [] + return copy.deepcopy(self._child_parameter_configs) + + def _del_child_parameter_configs(self): + """Deletes the current child ParameterConfigs.""" + object.__setattr__(self, '_child_parameter_configs', None) + + @property + def clone_without_children(self) -> 'ParameterConfig': + """Returns the clone of self, without child_parameter_configs.""" + clone = copy.deepcopy(self) + clone._del_child_parameter_configs() # pylint: disable='protected-access' + return clone + + @property + def feasible_values(self) -> Union[List[int], List[float], List[str]]: + if self.type in (ParameterType.DISCRETE, ParameterType.CATEGORICAL): + if not self._feasible_values: + return [] + return copy.copy(self._feasible_values) + elif self.type == ParameterType.INTEGER: + return list(range(self.bounds[0], self.bounds[1] + 1)) + raise ValueError('feasible_values is invalid for type: %s' % self.type) + + @property + def default_value(self) -> Optional[Union[int, float, str]]: + """Returns the default value, or None if not set.""" + return self._default_value + + def _set_matching_parent_values(self, + parent_values: MonotypeParameterSequence): + """Sets the given matching parent values in this object, without validation. + + Args: + parent_values: Parent values for which this child ParameterConfig is + active. Existing values will be replaced. + """ + object.__setattr__(self, '_matching_parent_values', list(parent_values)) + + def _set_child_parameter_configs(self, children: List['ParameterConfig']): + """Sets the given child ParameterConfigs in this object, without validation. + + Args: + children: The children to set in this object. Existing children will be + replaced. + """ + object.__setattr__(self, '_child_parameter_configs', children) + + def add_children( + self, new_children: Sequence[Tuple[MonotypeParameterSequence, + 'ParameterConfig']] + ) -> 'ParameterConfig': + """Clones the ParameterConfig and adds new children to it. + + Args: + new_children: A sequence of tuples formatted as: (matching_parent_values, + ParameterConfig). If the child ParameterConfig have pre-existing parent + values, they will be overridden. + + Returns: + A parent parameter config, with children set. + + Raises: + ValueError: If the child configs are invalid + TypeError: If matching parent values are invalid + """ + parent = copy.deepcopy(self) + if not new_children: + return parent + + for child_pair in new_children: + if len(child_pair) != 2: + raise ValueError('Each element in new_children must be a tuple of ' + '(Sequence of valid parent values, ParameterConfig),' + ' given: {}'.format(child_pair)) + + logging.debug('add_children: new_children=%s', new_children) + child_parameter_configs = parent.child_parameter_configs + for unsorted_parent_values, child in new_children: + parent_values = sorted(unsorted_parent_values) + child_copy = copy.deepcopy(child) + if parent.type == ParameterType.DISCRETE: + if not all(isinstance(v, (float, int)) for v in parent_values): + raise TypeError('Parent is DISCRETE-typed, but a child is specifying ' + 'one or more non float/int parent values: child={} ' + ', parent_values={}'.format(child, parent_values)) + child_copy._set_matching_parent_values(parent_values) # pylint: disable='protected-access' + elif parent.type == ParameterType.CATEGORICAL: + if not all(isinstance(v, str) for v in parent_values): + raise TypeError('Parent is CATEGORICAL-typed, but a child is ' + 'specifying one or more non float/int parent values: ' + 'child={}, parent_values={}'.format( + child, parent_values)) + child_copy._set_matching_parent_values(parent_values) # pylint: disable='protected-access' + elif parent.type == ParameterType.INTEGER: + # Allow {int, float}->float conversion but block str->float conversion. + int_values = [int(v) for v in parent_values] + if int_values != parent_values: + raise TypeError( + 'Parent is INTEGER-typed, but a child is specifying one or more ' + 'non-integral parent values: {}'.format(parent_values)) + child_copy._set_matching_parent_values(int_values) # pylint: disable='protected-access' + else: + raise ValueError('DOUBLE type cannot have child parameters') + child_parameter_configs.extend([child_copy]) + parent._set_child_parameter_configs(child_parameter_configs) # pylint: disable='protected-access' + return parent + + def continuify(self) -> 'ParameterConfig': + """Returns a newly created DOUBLE parameter with the same range.""" + if self.type == ParameterType.DOUBLE: + return copy.deepcopy(self) + elif not ParameterType.is_numeric(self.type): + raise ValueError( + 'Cannot convert a non-numeric parameter to DOUBLE: {}'.format(self)) + elif self._child_parameter_configs: + raise ValueError( + 'Cannot convert a parent parameter to DOUBLE: {}'.format(self)) + + scale_type = self.scale_type + if scale_type == ScaleType.UNIFORM_DISCRETE: + logging.log_every_n( + logging.WARNING, + 'Converting a UNIFORM_DISCRETE scaled discrete parameter ' + 'to DOUBLE: %s', 10, self) + scale_type = None + + default_value = self.default_value + if default_value is not None: + default_value = float(default_value) + return ParameterConfig.factory( + self.name, + bounds=(float(self.bounds[0]), float(self.bounds[1])), + scale_type=scale_type, + default_value=default_value) + + @classmethod + def merge(cls, one: 'ParameterConfig', + other: 'ParameterConfig') -> 'ParameterConfig': + """Merge two ParameterConfigs. + + Args: + one: ParameterConfig with no child parameters. + other: Must have the same type as one, and may not have child parameters. + + Returns: + For Categorical, Discrete or Integer ParameterConfigs, the resulting + config will be the union of all feasible values. + For Double ParameterConfigs, the resulting config will have [min_value, + max_value] set to the smallest and largest bounds. + + Raises: + ValueError: If any of the input configs has child parameters, or if + the two parameters have different types. + """ + if one.child_parameter_configs or other.child_parameter_configs: + raise ValueError( + 'Cannot merge parameters with child_parameter_configs: %s and %s' % + one, other) + if one.type != other.type: + raise ValueError('Type conflicts between {} and {}'.format( + one.type.name, other.type.name)) + if one.scale_type != other.scale_type: + logging.warning('Scale type conflicts while merging %s and %s', one, + other) + + if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): + new_feasible_values = list( + set(one.feasible_values + other.feasible_values)) + return ParameterConfig.factory( + name=one.name, + feasible_values=new_feasible_values, + scale_type=one.scale_type) + elif one.type in (ParameterType.INTEGER, ParameterType.DOUBLE): + original_min, original_max = one.bounds + other_min, other_max = other.bounds + new_bounds = (min(original_min, other_min), max(original_max, other_max)) + return ParameterConfig.factory( + name=one.name, bounds=new_bounds, scale_type=one.scale_type) + raise ValueError('Unknown type {}. This is currently' + 'an unreachable code.'.format(one.type)) + + def traverse( + self, + show_children: bool = False) -> Generator['ParameterConfig', None, None]: + """DFS Generator for parameter configs. + + Args: + show_children: If True, every generated ParameterConfig has + child_parameter_configs. For example, if 'foo' has two child configs + 'bar1' and 'bar2', then traversing 'foo' with show_children=True + generates (foo, with bar1,bar2 as children), (bar1), and (bar2). If + show_children=False, it generates (foo, without children), (bar1), and + (bar2). + + Yields: + DFS on all parameter configs. + """ + if show_children: + yield self + else: + yield self.clone_without_children + for child in self.child_parameter_configs: + yield from child.traverse(show_children) + + def contains( + self, value: Union[trial.ParameterValueTypes, + trial.ParameterValue]) -> bool: + """Check if the `value` is a valid value for this parameter config.""" + if not isinstance(value, trial.ParameterValue): + value = trial.ParameterValue(value) + + if self.type == ParameterType.DOUBLE: + return self.bounds[0] <= value.as_float and value.as_float <= self.bounds[ + 1] + elif self.type == ParameterType.INTEGER: + if value.as_int != value.as_float: + return False + return self.bounds[0] <= value.as_int and value.as_int <= self.bounds[1] + elif self.type == ParameterType.DISCRETE: + return value.as_float in self.feasible_values + elif self.type == ParameterType.CATEGORICAL: + return value.as_str in self.feasible_values + else: + raise NotImplementedError(f'Cannot determine whether {value} is feasible' + f'for Unknown parameter type {self.type}.\n' + f'Full config: {repr(self)}') + + @property + def num_feasible_values(self) -> Union[float, int]: + if self.type == ParameterType.DOUBLE: + return float('inf') + elif self.type == ParameterType.INTEGER: + return self.bounds[1] - self.bounds[0] + 1 + else: + return len(self.feasible_values) diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py new file mode 100644 index 0000000000..15f50da9a1 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py @@ -0,0 +1,321 @@ +"""Tests for vizier.pyvizier.shared.parameter_config.""" + +from typing import Any + +from vizier._src.pyvizier.shared import parameter_config as pc +from absl.testing import absltest +from absl.testing import parameterized + + +class ParameterConfigFactoryTest(parameterized.TestCase): + + def testCreatesDoubleConfig(self): + parameter_config = pc.ParameterConfig.factory( + 'name', + bounds=(-1.0, 1.0), + scale_type=pc.ScaleType.LINEAR, + default_value=0.1) + self.assertEqual(parameter_config.name, 'name') + self.assertEqual(parameter_config.type, pc.ParameterType.DOUBLE) + self.assertEqual(parameter_config.bounds, (-1, 1)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.LINEAR) + self.assertEqual(parameter_config.default_value, 0.1) + self.assertIsInstance(parameter_config.default_value, float) + with self.assertRaises(ValueError): + _ = parameter_config.feasible_values + + self.assertEqual(parameter_config.continuify(), parameter_config) + + def testCreatesIntegerConfig(self): + parameter_config = pc.ParameterConfig.factory( + 'name', bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=1) + self.assertEqual(parameter_config.name, 'name') + self.assertEqual(parameter_config.type, pc.ParameterType.INTEGER) + self.assertEqual(parameter_config.feasible_values, [1, 2, 3]) + self.assertEqual(parameter_config.bounds, (1, 3)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.LOG) + self.assertEqual(parameter_config.default_value, 1) + self.assertIsInstance(parameter_config.default_value, int) + + self.assertEqual( + parameter_config.continuify(), + pc.ParameterConfig.factory( + 'name', + bounds=(1.0, 3.0), + scale_type=pc.ScaleType.LOG, + default_value=1.0)) + + def testCreatesDiscreteConfig(self): + feasible_values = (-1, 3, 2) + parameter_config = pc.ParameterConfig.factory( + 'name', + feasible_values=feasible_values, + scale_type=pc.ScaleType.UNIFORM_DISCRETE, + default_value=2, + external_type=pc.ExternalType.INTEGER) + self.assertEqual(parameter_config.name, 'name') + self.assertEqual(parameter_config.type, pc.ParameterType.DISCRETE) + self.assertEqual(parameter_config.feasible_values, [-1, 2, 3]) + self.assertEqual(parameter_config.bounds, (-1, 3)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.UNIFORM_DISCRETE) + self.assertEqual(parameter_config.default_value, 2) + self.assertIsInstance(parameter_config.default_value, float) + self.assertEqual(parameter_config.external_type, pc.ExternalType.INTEGER) + + self.assertEqual( + parameter_config.continuify(), + pc.ParameterConfig.factory( + 'name', bounds=(-1.0, 3.0), default_value=2.0)) + + def testCreatesCategoricalConfig(self): + feasible_values = ('b', 'a', 'c') + parameter_config = pc.ParameterConfig.factory( + 'name', feasible_values=feasible_values, default_value='c') + self.assertEqual(parameter_config.name, 'name') + self.assertEqual(parameter_config.feasible_values, ['a', 'b', 'c']) + self.assertEqual(parameter_config.default_value, 'c') + with self.assertRaises(ValueError): + _ = parameter_config.bounds + + def testCreatesDoubleConfigIntDefault(self): + parameter_config = pc.ParameterConfig.factory( + 'name', + bounds=(-1.0, 1.0), + scale_type=pc.ScaleType.LINEAR, + default_value=1) + self.assertEqual(parameter_config.default_value, 1.0) + self.assertIsInstance(parameter_config.default_value, float) + + def testCreatesDiscreteConfigDoubleDefault(self): + feasible_values = (-1, 3, 2) + parameter_config = pc.ParameterConfig.factory( + 'name', + feasible_values=feasible_values, + scale_type=pc.ScaleType.UNIFORM_DISCRETE, + default_value=2.0) + self.assertEqual(parameter_config.default_value, 2.0) + self.assertIsInstance(parameter_config.default_value, float) + + def testCreatesIntegerConfigDoubleDefault(self): + parameter_config = pc.ParameterConfig.factory( + 'name', bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0) + self.assertEqual(parameter_config.default_value, 2.0) + self.assertIsInstance(parameter_config.default_value, int) + + def testCreatesIntegerConfigInvalidDoubleDefault(self): + with self.assertRaisesRegex(ValueError, 'default_value for an.*'): + pc.ParameterConfig.factory( + 'name', + bounds=(1, 3), + scale_type=pc.ScaleType.LOG, + default_value=2.0001) + + def testCreatesCategoricalConfigNoDefault(self): + feasible_values = ('b', 'a', 'c') + parameter_config = pc.ParameterConfig.factory( + 'name', feasible_values=feasible_values) + self.assertIsNone(parameter_config.default_value) + + def testCreatesCategoricalConfigBadDefault(self): + feasible_values = ('b', 'a', 'c') + with self.assertRaisesRegex(ValueError, + 'default_value has an incorrect type.*'): + pc.ParameterConfig.factory( + 'name', feasible_values=feasible_values, default_value=0.1) + + def testRaisesErrorWhenNameIsEmpty(self): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory('', bounds=(-1.0, 1.0)) + + def testRaisesErrorWhenOverSpecified(self): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory( + 'name', bounds=(-1.0, 1.0), feasible_values=['a', 'b', 'c']) + + @parameterized.named_parameters( + ('HaveInfinity', (-float('inf'), 1)), ('HaveNan', (1, float('nan'))), + ('HaveMixedTypes', (1, float(1))), ('AreWronglyOrdered', (1, -1))) + def testRaisesErrorWhenBounds(self, bounds): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory('name', bounds=bounds) + + @parameterized.named_parameters(('HaveDuplicateCategories', ['a', 'a', 'b']), + ('HaveDuplicateNumbers', [1.0, 2.0, 2.0]), + ('HaveMixedTypes', ['a', 1, 2])) + def testRaisesErrorWhenFeasibleValues(self, feasible_values): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory('name', feasible_values=feasible_values) + + +_child1 = pc.ParameterConfig.factory('double_child', bounds=(0.0, 1.0)) +_child2 = pc.ParameterConfig.factory('integer_child', bounds=(0, 1)) + + +class ParameterConfigFactoryTestWithChildren(parameterized.TestCase): + + @parameterized.named_parameters( + ('IntParentValues', [([0], _child1), ([0, 1], _child2)]), + ('FloatParentValues', [([0.0], _child1), ([0.0, 1.0], _child2)])) + def testIntegerWithValid(self, children): + p = pc.ParameterConfig.factory('parent', bounds=(0, 1), children=children) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, + children[0][0]) + self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, + children[1][0]) + + @parameterized.named_parameters( + ('FloatParentValues', [([0.5], _child1)]), + ('StringParentValues', [(['0'], _child1), (['0.0', '1.0'], _child2)])) + def testIntegerWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory('parent', bounds=(0, 1), children=children) + + @parameterized.named_parameters( + ('IntParentValues', [([0], _child1), ([1], _child2)]), + ('FloatParentValues', [([0.0], _child1), ([0.0, 1.0], _child2)])) + def testDiscreteWithValid(self, children): + p = pc.ParameterConfig.factory( + 'parent', feasible_values=[0.0, 1.0], children=children) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, + children[0][0]) + self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, + children[1][0]) + + @parameterized.named_parameters(('StringParentValues', [(['0.0'], _child1), + (['0.0', + '1.0'], _child2)])) + def testDiscreteWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory( + 'parent', feasible_values=[0.0, 1.0], children=children) + + @parameterized.named_parameters( # pyformat: disable + ('StringParentValues', [(['a'], _child1), (['a', 'b'], _child2)])) + def testCategoricalWithValid(self, children): + p = pc.ParameterConfig.factory( + 'parent', feasible_values=['a', 'b'], children=children) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, + children[0][0]) + self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, + children[1][0]) + + @parameterized.named_parameters(('StringParentValues', [(['0.0'], _child1), + (['1.0'], _child2)])) + def testCategoricalWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory( + 'parent', feasible_values=[0.0, 1.0], children=children) + + def testAddChildren(self): + children = [(['a'], _child1), (['a', 'b'], _child2)] + p = pc.ParameterConfig.factory( + 'parent', feasible_values=['a', 'b'], children=children) + new_children = [ + (['a'], pc.ParameterConfig.factory('double_child2', bounds=(1.0, 2.0))), + (['b'], + pc.ParameterConfig.factory( + 'categorical_child', feasible_values=['c', 'd'])), + ] + p2 = p.add_children(new_children) + self.assertLen(p.child_parameter_configs, 2) + self.assertSameElements([c.name for c in p.child_parameter_configs], + [c[1].name for c in children]) + + self.assertLen(p2.child_parameter_configs, 4) + expected_names = [c[1].name for c in children] + expected_names += [c[1].name for c in new_children] + got_names = [c.name for c in p2.child_parameter_configs] + self.assertSameElements(got_names, expected_names) + + +class MergeTest(parameterized.TestCase): + + def test_merge_bounds(self): + pc1 = pc.ParameterConfig.factory('pc1', bounds=(0.0, 2.0)) + pc2 = pc.ParameterConfig.factory('pc2', bounds=(-1.0, 1.0)) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory('pc1', bounds=(-1.0, 2.0))) + + def test_merge_discrete(self): + pc1 = pc.ParameterConfig.factory( + 'pc1', feasible_values=[0.0, 2.0], scale_type=pc.ScaleType.LINEAR) + pc2 = pc.ParameterConfig.factory('pc2', feasible_values=[-1.0, 0.0]) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory( + 'pc1', + feasible_values=[-1.0, 0.0, 2.0], + scale_type=pc.ScaleType.LINEAR)) + + def test_merge_categorical(self): + pc1 = pc.ParameterConfig.factory('pc1', feasible_values=['a', 'b']) + pc2 = pc.ParameterConfig.factory('pc2', feasible_values=['a', 'c']) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory('pc1', feasible_values=['a', 'b', 'c'])) + + +class ParameterConfigContainsTest(parameterized.TestCase): + + @parameterized.parameters((1.0, True), (-2.0, False), (3.0, False)) + def testFloat(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory('pc1', bounds=(-1., 2.)) + self.assertEqual(config.contains(value), expected) + + @parameterized.parameters((1, True), (-2, False), (3, False), (1.5, False)) + def testInt(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory('pc1', bounds=(-1, 2)) + self.assertEqual(config.contains(value), expected) + + @parameterized.parameters((1.0, False), (2, True), (-1, True)) + def testDiscrete(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory('pc1', feasible_values=[-1., 0., 2.]) + self.assertEqual(config.contains(value), expected) + + @parameterized.parameters(('a', True), ('b', False), ('c', False)) + def testCategorical(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory( + 'pc1', feasible_values=['a', 'aa', 'aaa']) + self.assertEqual(config.contains(value), expected) + + @parameterized.parameters((True, True), ('a', False), (0, False)) + def testBoolean(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory( + 'pc1', feasible_values=['true', 'false']) + self.assertEqual(config.contains(value), expected) + + +class TraverseTest(parameterized.TestCase): + + @parameterized.named_parameters(('ShowChildrenTrue', True), + ('ShowChildrenFalse', False)) + def testTraverse(self, show_children): + grandchild1 = pc.ParameterConfig.factory('grandchild1', bounds=(-1.0, 1.0)) + grandchildren = [(['a'], grandchild1), (['b'], grandchild1)] + child1 = pc.ParameterConfig.factory( + 'child1', feasible_values=['a', 'b'], children=grandchildren) + + child2 = pc.ParameterConfig.factory('child2', bounds=(0.0, 1.0)) + children = [([0], child1), ([1], child1), ([0, 1], child2)] + parent = pc.ParameterConfig.factory( + 'parent', bounds=(0, 1), children=children) + traversed_names = [ + pc.name for pc in parent.traverse(show_children=show_children) + ] + # Some parameter names are reused for separate child nodes, so they + # will appear multiple times, but they are indeed separate parameters. + self.assertEqual(traversed_names, [ + 'parent', 'child1', 'grandchild1', 'grandchild1', 'child1', + 'grandchild1', 'grandchild1', 'child2' + ]) + + +if __name__ == '__main__': + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py new file mode 100644 index 0000000000..476691e52b --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -0,0 +1,551 @@ +"""Wrapper classes for Trial protos and other messages in them. + +Example usage: + trial = Trial.from_proto(trial_proto) + print('This trial's auc is: ', trial.final_measurement.metrics['auc'].value) + print('This trial had parameter "n_hidden_layers": ', + trial.parameters['n_hidden_layers'].value) +""" + +import collections +from collections import abc as cabc +import copy +import datetime +import enum +from typing import Any, Dict, List, MutableMapping, Optional, Union, FrozenSet + +from absl import logging +import attr +import numpy as np + +from google.cloud.aiplatform.vizier.pyvizier import common + +ParameterValueTypes = Union[str, int, float, bool] +OrderedDict = collections.OrderedDict +Metadata = common.Metadata + + +class ExternalType(enum.Enum): + """Valid Values for ParameterConfig.external_type.""" + INTERNAL = 'INTERNAL' + BOOLEAN = 'BOOLEAN' + INTEGER = 'INTEGER' + FLOAT = 'FLOAT' + + +# Values should NEVER be removed from the enums below, only added. +class TrialStatus(enum.Enum): + """Values for Trial.Status.""" + UNKNOWN = 'UNKNOWN' + REQUESTED = 'REQUESTED' + ACTIVE = 'ACTIVE' + COMPLETED = 'COMPLETED' + STOPPING = 'STOPPING' + + +@attr.s(frozen=True, init=True, slots=True, kw_only=False) +class Metric: + """Enhanced immutable wrapper for vizier_pb2.Metric proto. + + It has an additional field "std" for internal usage. This field gets lost + when the object is converted to proto. + """ + + def _std_not_negative(self, _, stddev): + if stddev < 0: + raise ValueError( + 'Standard deviation must be a non-negative finite number.') + + value: float = attr.ib( + converter=float, + init=True, + validator=[attr.validators.instance_of(float)], + kw_only=False) + std: float = attr.ib( + converter=float, + validator=[attr.validators.instance_of(float), _std_not_negative], + init=True, + default=0.0, + kw_only=True) + + +# Use when you want to preserve the shapes or reduce if-else statements. +# e.g. `metrics.get('metric_name', NaNMetric).value` to get NaN or the actual +# value. +NaNMetric = Metric(value=np.nan) + + +@attr.s(auto_attribs=True, frozen=True, init=True, slots=True, repr=False) +class ParameterValue: + """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. + + Has accessors (properties) that cast the value into the type according + to StudyConfiguration class behavior. In particular, 'true' and 'false' are + treated as special strings that are cast to a numeric value of 1 and 0, + respectively, and boolean value of True and False, repectively. + """ + + value: ParameterValueTypes = attr.ib( + init=True, + validator=[ + attr.validators.instance_of((str, int, float, bool)), + ]) + + def cast( + self, + external_type: ExternalType, + ) -> ParameterValueTypes: + """Returns ParameterValue cast to external_type. + + Args: + external_type: + + Returns: + self.value if external_type is INTERNAL. + self.as_bool if external_type is BOOLEAN. + self.as_int if external_type is INTEGER. + self.as_float if external_type is FLOAT. + + Raises: + ValueError: If external_type is not valid. + """ + if external_type == ExternalType.INTERNAL: + return self.value + elif external_type == ExternalType.BOOLEAN: + return self.as_bool + elif external_type == ExternalType.INTEGER: + return self.as_int + elif external_type == ExternalType.FLOAT: + return self.as_float + else: + raise ValueError( + 'Unknown external type enum value: {}.'.format(external_type)) + + @property + def as_float(self) -> Optional[float]: + """Returns the value cast to float.""" + if self.value == 'true': + return 1.0 + elif self.value == 'false': + return 0.0 + elif isinstance(self.value, str): + return None + return float(self.value) + + @property + def as_int(self) -> Optional[int]: + """Returns the value cast to int.""" + if self.value == 'true': + return 1 + elif self.value == 'false': + return 0 + elif isinstance(self.value, str): + return None + return int(self.value) + + @property + def as_str(self) -> Optional[str]: + """Returns str-typed value or lowercase 'true'/'false' if value is bool.""" + if isinstance(self.value, bool): + return str(self.value).lower() + elif isinstance(self.value, str): + return self.value + return None + + @property + def as_bool(self) -> Optional[bool]: + """Returns the value as bool following StudyConfiguration's behavior. + + Returns: True if value is 'true' or 1. False if value is + 'false' or 0. For all other cases, returns None. + For string type, this behavior is consistent with how + StudyConfiguration.AddBooleanParameter's. For other types, this + guarantees that self.value == self.as_bool + """ + if isinstance(self.value, str): + if self.value.lower() == 'true': + return True + elif self.value.lower() == 'false': + return False + else: + if self.value == 1.0: + return True + elif self.value == 0.0: + return False + return None + + def __str__(self) -> str: + return str(self.value) + + def __repr__(self) -> str: + return str(self.value) + + +class _MetricDict(collections.UserDict): + + def __setitem__(self, key: str, value: Union[float, Metric]): + if isinstance(value, Metric): + self.data.__setitem__(key, value) + else: + self.data.__setitem__(key, Metric(value=value)) + + +@attr.s(auto_attribs=True, frozen=False, init=True, slots=True) +class Measurement: + """Collection of metrics with a timestamp.""" + + def _value_is_finite(self, _, value): + if not (np.isfinite(value) and value >= 0): + raise ValueError('Must be finite and non-negative.') + + # Should be used as a regular Dict. + metrics: MutableMapping[str, Metric] = attr.ib( + init=True, + converter=lambda d: _MetricDict(**d), + default=_MetricDict(), + validator=attr.validators.instance_of(_MetricDict), + on_setattr=[attr.setters.convert, attr.setters.validate]) + + elapsed_secs: float = attr.ib( + converter=float, + init=True, + default=0, + validator=[attr.validators.instance_of(float), _value_is_finite], + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True) + + steps: float = attr.ib( + converter=int, + init=True, + default=0, + validator=[attr.validators.instance_of(int), _value_is_finite], + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True) + + +def _to_local_time( + dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: + """Converter for initializing timestamps in Trial class.""" + return dt.astimezone() if dt else None + + +@attr.define(init=False, frozen=True, eq=True) +class ParameterDict(cabc.MutableMapping): + """Parameter dictionary. + + Maps the parameter names to their values. Works like a regular + dict[str, ParameterValue] for the most part, except one can directly assign + values of type `ParameterValueType`. So, + ParameterDict(a=3) and + ParameterDict(a=ParameterValue(3)) are equivalent. + + To access the raw value directly, use get_value() method. + d.get('a').value == d.get_value('a') + """ + + _items: MutableMapping[str, ParameterValue] = attr.field( + init=False, factory=dict) + + def __init__(self, iterable: Any = tuple(), **kwargs): + self.__attrs_init__() + self.update(iterable, **kwargs) + + def __setitem__(self, key: str, value: Union[ParameterValue, + ParameterValueTypes]): + if isinstance(value, ParameterValue): + self._items[key] = value + else: + self._items[key] = ParameterValue(value) + + def __delitem__(self, key: str): + del self._items[key] + + def __getitem__(self, key: str) -> ParameterValue: + return self._items[key] + + def __len__(self) -> int: + return len(self._items) + + def __iter__(self): + return iter(self._items) + + def get_value( + self, + key: str, + default: Optional[ParameterValueTypes] = None + ) -> Optional[ParameterValueTypes]: + pv = self.get(key, default) + if isinstance(pv, ParameterValue): + return pv.value + else: + return pv + + +@attr.define(auto_attribs=True, frozen=False, init=True, slots=True) +class Trial: + """Wrapper for learning_vizier.service.Trial proto.""" + id: int = attr.ib( + init=True, + kw_only=True, + default=0, + validator=attr.validators.instance_of(int), + ) + + _is_requested: bool = attr.ib( + init=True, + kw_only=True, + default=False, + validator=attr.validators.instance_of(bool)) + + assigned_worker: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + stopping_reason: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + _infeasibility_reason: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + description: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + parameters: ParameterDict = attr.field( + init=True, + kw_only=True, + factory=ParameterDict, + converter=ParameterDict, + validator=attr.validators.instance_of(ParameterDict)) + + metadata: Metadata = attr.ib( + init=True, + kw_only=True, + default=Metadata(), + validator=attr.validators.instance_of(Metadata), + ) + + related_links: Dict[str, str] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(str), + mapping_validator=attr.validators.instance_of(dict)), + ) # pytype: disable=wrong-arg-types + + final_measurement: Optional[Measurement] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional( + attr.validators.instance_of(Measurement)), + ) + + measurements: List[Measurement] = attr.ib( + init=True, + kw_only=True, + default=list(), + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(Measurement), + iterable_validator=attr.validators.instance_of(list)), + ) + + creation_time: Optional[datetime.datetime] = attr.ib( + init=True, + default=datetime.datetime.now(), + converter=_to_local_time, + kw_only=True, + repr=lambda v: v.strftime('%x %X') if v is not None else 'None', + validator=attr.validators.optional( + attr.validators.instance_of(datetime.datetime)), + ) + + completion_time: Optional[datetime.datetime] = attr.ib( + init=True, + kw_only=True, + default=None, + repr=lambda v: v.strftime('%x %X') if v is not None else 'None', + converter=_to_local_time, + validator=attr.validators.optional( + attr.validators.instance_of(datetime.datetime)), + ) + + @property + def duration(self) -> Optional[datetime.timedelta]: + """Returns the duration of this Trial if it is completed, or None.""" + if self.completion_time: + return self.completion_time - self.creation_time + else: + return None + + @property + def status(self) -> TrialStatus: + """Status. + + COMPLETED: Trial has final measurement or is declared infeasible. + ACTIVE: Trial is being evaluated. + STOPPING: Trial is being evaluated, but was decided to be not worth further + evaluating. + REQUESTED: Trial is queued for future suggestions. + """ + if self.final_measurement is not None or self.infeasible: + return TrialStatus.COMPLETED + elif self.stopping_reason is not None: + return TrialStatus.STOPPING + elif self._is_requested: + return TrialStatus.REQUESTED + else: + return TrialStatus.ACTIVE + + @property + def is_completed(self) -> bool: + """Returns True if this Trial is completed.""" + if self.status == TrialStatus.COMPLETED: + if self.completion_time is None: + logging.warning('Invalid Trial state: status is COMPLETED, but a ' + ' completion_time was not set') + return True + elif self.completion_time is not None: + if self.status is None: + logging.warning('Invalid Trial state: status is not set to COMPLETED, ' + 'but a completion_time is set') + return True + return False + + @property + def infeasible(self) -> bool: + """Returns True if this Trial is infeasible.""" + return self._infeasibility_reason is not None + + @property + def infeasibility_reason(self) -> Optional[str]: + """Returns this Trial's infeasibility reason, if set.""" + return self._infeasibility_reason + + def complete(self, + measurement: Measurement, + *, + inplace: bool = True) -> 'Trial': + """Completes the trial and returns it. + + Args: + measurement: Measurement to complete the trial with. + inplace: If True, Trial is modified in place. If False, which is the + default, then the operation is performed and it returns a copy of the + object + + Returns: + Completed Trial. + """ + if inplace: + # Use setattr. If we assign to self.final_measurement, then hyperref + # mechanisms think this line is where `final_measurement` property + # is defined, instead of where we declare attr.ib. + self.__setattr__('final_measurement', copy.deepcopy(measurement)) + self.completion_time = _to_local_time(datetime.datetime.now()) + return self + else: + clone = copy.deepcopy(self) + return clone.complete(measurement, inplace=True) + + +# Define aliases. +CompletedTrial = Trial +PendingTrial = Trial +CompletedTrialWithMeasurements = Trial +PendingTrialWithMeasurements = Trial + + +@attr.frozen +class TrialSuggestion: + """Freshly suggested trial. + + Suggestion can be converted to Trial object which has more functionalities. + """ + + parameters: ParameterDict = attr.field( + init=True, + factory=ParameterDict, + converter=ParameterDict, + validator=attr.validators.instance_of(ParameterDict)) # pytype: disable=wrong-arg-types + + metadata: Metadata = attr.field( + init=True, + kw_only=True, + factory=Metadata, + validator=attr.validators.instance_of(Metadata)) + + def to_trial(self, uid: int) -> Trial: + """Assign an id and make it a Trial object. + + Usually SuggetedTrial objects are shorted-lived and not exposed to end + users. This method is for non-service usage of trial suggestions in + benchmarks, tests, colabs, etc. + + Args: + uid: Trial id. + + Returns: + Trial object. + """ + return Trial(id=uid, parameters=self.parameters, metadata=self.metadata) + +@attr.define +class TrialFilter: + """Trial filter. + + All filters are by default 'AND' conditions. + + Attributes: + ids: If set, requires the trial's id to be in the set. + min_id: If set, requires the trial's id to be at least this number. + max_id: If set, requires the trial's id to be at most this number. + status: If set, requires the trial's status to be in the set. + """ + ids: Optional[FrozenSet[int]] = attr.field( + default=None, + converter=lambda x: frozenset(x) if x is not None else None, + validator=attr.validators.optional( + attr.validators.deep_iterable( + attr.validators.instance_of(int), + attr.validators.instance_of(frozenset)))) + min_id: Optional[int] = attr.field(default=None) + max_id: Optional[int] = attr.field(default=None) + status: Optional[FrozenSet[TrialStatus]] = attr.field( + default=None, + converter=lambda x: frozenset(x) if x is not None else None, + validator=attr.validators.optional( + attr.validators.deep_iterable( + attr.validators.instance_of(TrialStatus), + attr.validators.instance_of(frozenset)))) + + def __call__(self, trial: Trial) -> bool: + if self.ids is not None: + if trial.id not in self.ids: + return False + if self.min_id is not None: + if trial.id < self.min_id: + return False + if self.max_id is not None: + if trial.id > self.max_id: + return False + if self.status is not None: + if trial.status not in self.status: + return False + return True diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial_test.py b/google/cloud/aiplatform/vizier/pyvizier/trial_test.py new file mode 100644 index 0000000000..20f6281995 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/trial_test.py @@ -0,0 +1,219 @@ +"""Tests for vizier.pyvizier.shared.trial.""" +import copy +import datetime + +import numpy as np + +from vizier._src.pyvizier.shared import trial +from absl.testing import absltest +from absl.testing import parameterized + +Metric = trial.Metric +Measurement = trial.Measurement + + +class MetricTest(absltest.TestCase): + + def testMetricCreation(self): + _ = Metric(value=0, std=0.5) + + def testMetricCanHaveNaN(self): + _ = Metric(value=np.nan, std=-np.nan) + + def testMetricCannotHaveNegativeStd(self): + with self.assertRaises(ValueError): + _ = Metric(value=0, std=-0.5) + + +class MeasurementTest(absltest.TestCase): + + def testMetricsInitializedFromFloats(self): + m = Measurement() + m.metrics = dict(a=0.3) + self.assertEqual(m.metrics['a'], Metric(0.3)) + m.metrics['b'] = 0.5 + self.assertEqual(m.metrics, {'a': Metric(0.3), 'b': Metric(0.5)}) + + def testMetrics(self): + m = Measurement() + m.metrics = dict(a=Metric(0.3)) + self.assertEqual(m.metrics['a'], Metric(0.3)) + + def testTimeStampsAreNotFrozen(self): + m = Measurement() + m.elapsed_secs = 1.0 + m.steps = 5 + + +ParameterValue = trial.ParameterValue + + +class ParameterValueTest(parameterized.TestCase): + + @parameterized.named_parameters(('True', True), ('False', False)) + def testBool(self, bool_value): + value = ParameterValue(bool_value) + self.assertEqual(value.as_float, float(bool_value)) + self.assertEqual(value.as_int, int(bool_value)) + self.assertEqual(value.as_str, str(bool_value).lower()) + + def testIntegralFloat0(self): + value = ParameterValue(0.0) + self.assertEqual(value.as_float, 0.0) + self.assertEqual(value.as_int, 0) + self.assertEqual(value.as_bool, False) + self.assertIsNone(value.as_str) + + def testIntegralFloat1(self): + value = ParameterValue(1.0) + self.assertEqual(value.as_float, 1.0) + self.assertEqual(value.as_int, 1) + self.assertEqual(value.as_bool, True) + self.assertIsNone(value.as_str) + + def testIntegralFloat2(self): + value = ParameterValue(2.0) + self.assertEqual(value.as_float, 2.0) + self.assertEqual(value.as_int, 2) + self.assertIsNone(value.as_bool) + self.assertIsNone(value.as_str) + + def testInteger0(self): + value = ParameterValue(0) + self.assertEqual(value.as_float, 0) + self.assertEqual(value.as_int, 0) + self.assertEqual(value.as_bool, False) + self.assertIsNone(value.as_str) + + def testInteger1(self): + value = ParameterValue(1) + self.assertEqual(value.as_float, 1) + self.assertEqual(value.as_int, 1) + self.assertEqual(value.as_bool, True) + self.assertIsNone(value.as_str) + + def testInteger2(self): + value = ParameterValue(2) + self.assertEqual(value.as_float, 2) + self.assertEqual(value.as_int, 2) + self.assertIsNone(value.as_bool) + self.assertIsNone(value.as_str) + + def testStringTrue(self): + value = ParameterValue('true') + self.assertEqual(value.as_bool, True) + self.assertEqual(value.as_str, 'true') + + def testStringFalse(self): + value = ParameterValue('false') + self.assertEqual(value.as_bool, False) + self.assertEqual(value.as_str, 'false') + + def testCastAsExternalNone(self): + value = ParameterValue(1.0) + # pytype: disable=wrong-arg-types + with self.assertRaisesRegex(ValueError, 'Unknown external type'): + value.cast(None) + # pytype: enable=wrong-arg-types + + def testParameterCanHaveNonFiniteValues(self): + ParameterValue(float('nan')) + ParameterValue(value=float('inf')) + ParameterValue(value=float('inf')) + + +class TrialTest(absltest.TestCase): + + def testCompleteInplace(self): + test = trial.Trial() + measurement = Measurement(metrics={ + 'pr-auc': Metric(value=0.8), + 'latency': Metric(value=32) + }) + completed = test.complete(measurement, inplace=True) + + # The trial was completed in place. + self.assertEqual(test.final_measurement, measurement) + self.assertLessEqual(test.completion_time, + datetime.datetime.now().astimezone()) + self.assertGreaterEqual(test.completion_time, test.creation_time) + self.assertGreaterEqual(test.duration.total_seconds(), 0) + + # completed is the same reference as test. + self.assertEqual(test, completed) + + def testCompleteNotInplace(self): + """Complete with inplace=False.""" + test = trial.Trial() + measurement = Measurement(metrics={ + 'pr-auc': Metric(value=0.8), + 'latency': Metric(value=32) + }) + + test_copy = copy.deepcopy(test) + + completed = test.complete(measurement, inplace=False) + + # The returned Trial is completed. + self.assertEqual(completed.final_measurement, measurement) + self.assertGreaterEqual(completed.completion_time, completed.creation_time) + self.assertLessEqual(completed.completion_time, + datetime.datetime.now().astimezone()) + self.assertGreaterEqual(completed.duration.total_seconds(), 0) + self.assertEqual(completed.status, trial.TrialStatus.COMPLETED) + self.assertTrue(completed.is_completed) + + # The original Trial is unchanged. + self.assertEqual(test_copy, test) + self.assertIsNone(test.final_measurement) + self.assertIsNone(test.completion_time) + self.assertIsNone(test.duration) + self.assertEqual(test.status, trial.TrialStatus.ACTIVE) + self.assertFalse(test.is_completed) + + def testDefaultsNotShared(self): + """Make sure default parameters are not shared between instances.""" + trial1 = trial.Trial() + trial2 = trial.Trial() + trial1.parameters['x1'] = trial.ParameterValue(5) + self.assertEmpty(trial2.parameters) + + +class ParameterDictTest(parameterized.TestCase): + + @parameterized.parameters((True,), (3,), (1.,), ('aa',)) + def testAssignRawValue(self, v): + d = trial.ParameterDict() + d['p1'] = v + self.assertEqual(d.get('p1'), trial.ParameterValue(v)) + self.assertEqual(d.get_value('p1'), v) + self.assertEqual(d.get_value('p2', 'default'), 'default') + self.assertLen(d, 1) + self.assertLen(d.items(), 1) + + @parameterized.parameters((True,), (3,), (1.,), ('aa',)) + def testAssignWrappedValue(self, v): + d = trial.ParameterDict() + v = trial.ParameterValue(v) + d['p1'] = v + self.assertEqual(d.get('p1'), v) + self.assertEqual(d.get_value('p1'), v.value) + self.assertEqual(d.get_value('p2', 'default'), 'default') + self.assertLen(d, 1) + self.assertLen(d.items(), 1) + + +class SuggestionTestI(absltest.TestCase): + + def testToTrial(self): + suggestion = trial.TrialSuggestion({'a': 3, 'b': True}) + suggestion.metadata['key'] = 'value' + + t = suggestion.to_trial(1) + self.assertEqual(t.id, 1) + self.assertEqual(t.parameters, suggestion.parameters) + self.assertEqual(t.metadata, suggestion.metadata) + + +if __name__ == '__main__': + absltest.main() From f58f5a24a6d2bc4e93923bdc610303d1e75aab59 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 3 Jun 2022 13:12:15 -0700 Subject: [PATCH 03/36] Added the Trial in the aiplatform folder. --- google/cloud/aiplatform/__init__.py | 1 + google/cloud/aiplatform/vizier/__init__.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 5b6c0f1fa8..dd3d399360 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -48,6 +48,7 @@ HyperparameterTuningJob, ) from google.cloud.aiplatform.vizier import Study +from google.cloud.aiplatform.vizier import Trial from google.cloud.aiplatform.pipeline_jobs import PipelineJob from google.cloud.aiplatform.tensorboard import ( Tensorboard, diff --git a/google/cloud/aiplatform/vizier/__init__.py b/google/cloud/aiplatform/vizier/__init__.py index 1b9a972dd8..8ff128e3e0 100644 --- a/google/cloud/aiplatform/vizier/__init__.py +++ b/google/cloud/aiplatform/vizier/__init__.py @@ -14,7 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from google.cloud.aiplatform.vizier.study import Study +from google.cloud.aiplatform.vizier.trial import Trial __all__ = ( "Study", + "Trial", ) From 9524e9a4ae37a8f9b37e67bd8e390274ceb89ee1 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 3 Jun 2022 13:22:08 -0700 Subject: [PATCH 04/36] Forked the pyvizier from the pyVizier. --- .../vizier/pyvizier/automated_stopping.py | 74 +++ .../vizier/pyvizier/metadata_util.py | 93 ++++ .../vizier/pyvizier/proto_converters.py | 458 ++++++++++++++++++ .../vizier/pyvizier/study_config.py | 453 +++++++++++++++++ 4 files changed, 1078 insertions(+) create mode 100644 google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/metadata_util.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/proto_converters.py create mode 100644 google/cloud/aiplatform/vizier/pyvizier/study_config.py diff --git a/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py b/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py new file mode 100644 index 0000000000..5616493ad5 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py @@ -0,0 +1,74 @@ +"""Convenience classes for configuring Vizier Early-Stopping Configs.""" +import copy +from typing import Union + +import attr + +from google.cloud.aiplatform.compat.types import study as study_pb2 + +AutomatedStoppingConfigProto = Union[ + study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec, + study_pb2.StudySpec.MedianAutomatedStoppingSpec] + + +@attr.s(frozen=True, init=True, slots=True, kw_only=True) +class AutomatedStoppingConfig: + """A wrapper for study_pb2.automated_stopping_spec.""" + _proto: AutomatedStoppingConfigProto = attr.ib(init=True, kw_only=True) + + @classmethod + def decay_curve_stopping_config(cls, + use_steps: bool) -> 'AutomatedStoppingConfig': + """Create a DecayCurve automated stopping config. + + Vizier will early stop the Trial if it predicts the Trial objective value + will not be better than previous Trials. + + Args: + use_steps: Bool. If set, use Measurement.step_count as the measure of + training progress. Otherwise, use Measurement.elapsed_duration. + + Returns: + AutomatedStoppingConfig object. + + Raises: + ValueError: If more than one metric is configured. + Note that Vizier Early Stopping currently only supports single-objective + studies. + """ + config = study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec( + use_elapsed_duration=not use_steps) + return cls(proto=config) + + @classmethod + def median_automated_stopping_config( + cls, use_steps: bool) -> 'AutomatedStoppingConfig': + """Create a Median automated stopping config. + + Vizier will early stop the Trial if it predicts the Trial objective value + will not be better than previous Trials. + + Args: + use_steps: Bool. If set, use Measurement.step_count as the measure of + training progress. Otherwise, use Measurement.elapsed_duration. + + Returns: + AutomatedStoppingConfig object. + + Raises: + ValueError: If more than one metric is configured. + Note that Vizier Early Stopping currently only supports single-objective + studies. + """ + config = study_pb2.StudySpec.MedianAutomatedStoppingSpec( + use_elapsed_duration=not use_steps) + return cls(proto=config) + + @classmethod + def from_proto( + cls, proto: AutomatedStoppingConfigProto) -> 'AutomatedStoppingConfig': + return cls(proto=proto) + + def to_proto(self) -> AutomatedStoppingConfigProto: + """Returns this object as a proto.""" + return copy.deepcopy(self._proto) diff --git a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py new file mode 100644 index 0000000000..3a586dff9f --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py @@ -0,0 +1,93 @@ +"""Utility functions for handling vizier metadata.""" + +from typing import Tuple, Union, Optional, TypeVar, Type +#from vizier.service import key_value_pb2 +from google.cloud.aiplatform.compat.types import study as study_pb2 +from google.protobuf import any_pb2 +from google.protobuf.message import Message + +T = TypeVar('T') + + +def assign( + container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, + ns: str, value: Union[str, any_pb2.Any, + Message]) : #-> Tuple[key_value_pb2.KeyValue, bool]: + """Insert or assign (key, value) to container.metadata. + + Args: + container: container.metadata must be repeated KeyValue (protobuf) field. + key: + ns: A namespace for the key (defaults to '', which is the user's namespace). + value: Behavior depends on the type. `str` is copied to KeyValue.value + `any_pb2.Any` is copied to KeyValue.proto Other types are packed to + any_pb2.Any proto, which is then copied to KeyValue.proto. + + Returns: + (proto, inserted) where + proto is the protobuf that was just inserted into the $container, and + inserted is True if the proto was newly inserted, False if it was replaced. + """ + + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if isinstance(value, str): + kv.ClearField('proto') + kv.value = value + elif isinstance(value, any_pb2.Any): + kv.ClearField('value') + kv.proto.CopyFrom(value) + else: + kv.ClearField('value') + kv.proto.Pack(value) + return kv, False + + # The key does not exist in the metadata. + if isinstance(value, str): + metadata = container.metadata.add(key=key, ns=ns, value=value) + elif isinstance(value, any_pb2.Any): + metadata = container.metadata.add(key=key, ns=ns, proto=value) + else: + metadata = container.metadata.add(key=key, ns=ns) + metadata.proto.Pack(value) + return metadata, True + + +def get(container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, + ns: str) -> Optional[str]: + """Returns the metadata value associated with key, or None. + + Args: + container: A Trial of a StudySpec in protobuf form. + key: The key of a KeyValue protobuf. + ns: A namespace for the key (defaults to '', which is the user's namespace). + """ + + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if not kv.HasField('proto'): + return kv.value + return None + + +def get_proto(container: Union[study_pb2.StudySpec, study_pb2.Trial], *, + key: str, ns: str, cls: Type[T]) -> Optional[T]: + """Unpacks the proto metadata into message. + + Args: + container: (const) StudySpec or Trial to search the metadata from. + key: (const) Lookup key of the metadata. + ns: A namespace for the key (defaults to '', which is the user's namespace). + cls: Pass in a proto ***class***, not a proto object. + + Returns: + Proto message, if the value associated with the key exists and + can be parsed into proto; None otherwise. + """ + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if kv.HasField('proto'): + message = cls() + success = kv.proto.Unpack(message) + return message if success else None + return None diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py new file mode 100644 index 0000000000..5cfe28f337 --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -0,0 +1,458 @@ +"""Converters for OSS Vizier's protos from/to PyVizier's classes.""" +import datetime +import logging +from typing import List, Optional, Sequence, Tuple, Union +from absl import logging + +from google.protobuf import duration_pb2 +from google.cloud.aiplatform.compat.types import study as study_pb2 +from google.cloud.aiplatform.vizier.pyvizier import parameter_config +from google.cloud.aiplatform.vizier.pyvizier import trial + +ScaleType = parameter_config.ScaleType +_ScaleTypePb2 = study_pb2.StudySpec.ParameterSpec.ScaleType +ParameterType = parameter_config.ParameterType +MonotypeParameterSequence = parameter_config.MonotypeParameterSequence + + +class _ScaleTypeMap: + """Proto converter for scale type.""" + _pyvizier_to_proto = { + parameter_config.ScaleType.LINEAR: + _ScaleTypePb2.UNIT_LINEAR_SCALE, + parameter_config.ScaleType.LOG: + _ScaleTypePb2.UNIT_LOG_SCALE, + parameter_config.ScaleType.REVERSE_LOG: + _ScaleTypePb2.UNIT_REVERSE_LOG_SCALE, + } + _proto_to_pyvizier = {v: k for k, v in _pyvizier_to_proto.items()} + + @classmethod + def to_proto(cls, pyvizier: parameter_config.ScaleType) -> _ScaleTypePb2: + return cls._pyvizier_to_proto[pyvizier] + + @classmethod + def from_proto(cls, proto: _ScaleTypePb2) -> parameter_config.ScaleType: + return cls._proto_to_pyvizier[proto] + + +class ParameterConfigConverter: + """Converter for ParameterConfig.""" + + @classmethod + def _set_bounds(cls, proto: study_pb2.StudySpec.ParameterSpec, lower: float, + upper: float, parameter_type: ParameterType): + """Sets the proto's min_value and max_value fields.""" + if parameter_type == ParameterType.INTEGER: + proto.integer_value_spec.min_value = lower + proto.integer_value_spec.max_value = upper + elif parameter_type == ParameterType.DOUBLE: + proto.double_value_spec.min_value = lower + proto.double_value_spec.max_value = upper + + @classmethod + def _set_feasible_points(cls, proto: study_pb2.StudySpec.ParameterSpec, + feasible_points: Sequence[float]): + """Sets the proto's feasible_points field.""" + feasible_points = sorted(feasible_points) + proto.discrete_value_spec.values.clear() + proto.discrete_value_spec.values.extend(feasible_points) + + @classmethod + def _set_categories(cls, proto: study_pb2.StudySpec.ParameterSpec, + categories: Sequence[str]): + """Sets the protos' categories field.""" + proto.categorical_value_spec.values.clear() + proto.categorical_value_spec.values.extend(categories) + + @classmethod + def _set_default_value(cls, proto: study_pb2.StudySpec.ParameterSpec, + default_value: Union[float, int, str]): + """Sets the protos' default_value field.""" + which_pv_spec = proto.WhichOneof('parameter_value_spec') + getattr(proto, which_pv_spec).default_value.value = default_value + + @classmethod + def _matching_parent_values( + cls, proto: study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec + ) -> MonotypeParameterSequence: + """Returns the matching parent values, if set.""" + oneof_name = proto.WhichOneof('parent_value_condition') + if not oneof_name: + return [] + if oneof_name in ('parent_discrete_values', 'parent_int_values', + 'parent_categorical_values'): + return list(getattr(getattr(proto, oneof_name), 'values')) + raise ValueError('Unknown matching_parent_vals: {}'.format(oneof_name)) + + @classmethod + def from_proto( + cls, + proto: study_pb2.StudySpec.ParameterSpec, + *, + strict_validation: bool = False) -> parameter_config.ParameterConfig: + """Creates a ParameterConfig. + + Args: + proto: + strict_validation: If True, raise ValueError to enforce that + from_proto(proto).to_proto == proto. + + Returns: + ParameterConfig object + + Raises: + ValueError: See the "strict_validtion" arg documentation. + """ + feasible_values = [] + oneof_name = proto.WhichOneof('parameter_value_spec') + if oneof_name == 'integer_value_spec': + bounds = (int(proto.integer_value_spec.min_value), + int(proto.integer_value_spec.max_value)) + elif oneof_name == 'double_value_spec': + bounds = (proto.double_value_spec.min_value, + proto.double_value_spec.max_value) + elif oneof_name == 'discrete_value_spec': + bounds = None + feasible_values = proto.discrete_value_spec.values + elif oneof_name == 'categorical_value_spec': + bounds = None + feasible_values = proto.categorical_value_spec.values + + default_value = None + if getattr(proto, oneof_name).default_value.value: + default_value = getattr(proto, oneof_name).default_value.value + + if proto.conditional_parameter_specs: + children = [] + for conditional_ps in proto.conditional_parameter_specs: + parent_values = cls._matching_parent_values(conditional_ps) + children.append( + (parent_values, cls.from_proto(conditional_ps.parameter_spec))) + else: + children = None + + scale_type = None + if proto.scale_type: + scale_type = _ScaleTypeMap.from_proto(proto.scale_type) + + try: + config = parameter_config.ParameterConfig.factory( + name=proto.parameter_id, + feasible_values=feasible_values, + bounds=bounds, + children=children, + scale_type=scale_type, + default_value=default_value) + except ValueError as e: + raise ValueError( + 'The provided proto was misconfigured. {}'.format(proto)) from e + + if strict_validation and cls.to_proto(config) != proto: + raise ValueError( + 'The provided proto was misconfigured. Expected: {} Given: {}'.format( + cls.to_proto(config), proto)) + return config + + @classmethod + def _set_child_parameter_configs( + cls, parent_proto: study_pb2.StudySpec.ParameterSpec, + pc: parameter_config.ParameterConfig): + """Sets the parent_proto's conditional_parameter_specs field. + + Args: + parent_proto: Modified in place. + pc: Parent ParameterConfig to copy children from. + + Raises: + ValueError: If the child configs are invalid + """ + children: List[Tuple[MonotypeParameterSequence, + parameter_config.ParameterConfig]] = [] + for child in pc.child_parameter_configs: + children.append((child.matching_parent_values, child)) + if not children: + return + parent_proto.conditional_parameter_specs.clear() + for child_pair in children: + if len(child_pair) != 2: + raise ValueError("""Each element in children must be a tuple of + (Sequence of valid parent values, ParameterConfig)""") + + logging.debug('_set_child_parameter_configs: parent_proto=%s, children=%s', + parent_proto, children) + for unsorted_parent_values, child in children: + parent_values = sorted(unsorted_parent_values) + child_proto = cls.to_proto(child.clone_without_children) + conditional_parameter_spec = study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec( + parameter_spec=child_proto) + + if parent_proto.HasField('discrete_value_spec'): + conditional_parameter_spec.parent_discrete_values.values[:] = parent_values + elif parent_proto.HasField('categorical_value_spec'): + conditional_parameter_spec.parent_categorical_values.values[:] = parent_values + elif parent_proto.HasField('integer_value_spec'): + conditional_parameter_spec.parent_int_values.values[:] = parent_values + else: + raise ValueError('DOUBLE type cannot have child parameters') + if child.child_parameter_configs: + cls._set_child_parameter_configs(child_proto, child) + parent_proto.conditional_parameter_specs.extend( + [conditional_parameter_spec]) + + @classmethod + def to_proto( + cls, pc: parameter_config.ParameterConfig + ) -> study_pb2.StudySpec.ParameterSpec: + """Returns a ParameterConfig Proto.""" + proto = study_pb2.StudySpec.ParameterSpec(parameter_id=pc.name) + if pc.type == ParameterType.DISCRETE: + cls._set_feasible_points(proto, [float(v) for v in pc.feasible_values]) + elif pc.type == ParameterType.CATEGORICAL: + cls._set_categories(proto, pc.feasible_values) + elif pc.type in (ParameterType.INTEGER, ParameterType.DOUBLE): + cls._set_bounds(proto, pc.bounds[0], pc.bounds[1], pc.type) + else: + raise ValueError('Invalid ParameterConfig: {}'.format(pc)) + if pc.scale_type is not None and pc.scale_type != ScaleType.UNIFORM_DISCRETE: + proto.scale_type = _ScaleTypeMap.to_proto(pc.scale_type) + if pc.default_value is not None: + cls._set_default_value(proto, pc.default_value) + + cls._set_child_parameter_configs(proto, pc) + return proto + + +class ParameterValueConverter: + """Converter for trial.ParameterValue.""" + + @classmethod + def from_proto( + cls, proto: study_pb2.Trial.Parameter) -> Optional[trial.ParameterValue]: + """Returns whichever value that is populated, or None.""" + value_proto = proto.value + oneof_name = value_proto.WhichOneof('kind') + potential_value = getattr(value_proto, oneof_name) + if isinstance(potential_value, float) or isinstance( + potential_value, str) or isinstance(potential_value, bool): + return trial.ParameterValue(potential_value) + else: + return None + + @classmethod + def to_proto(cls, parameter_value: trial.ParameterValue, + name: str) -> study_pb2.Trial.Parameter: + """Returns Parameter Proto.""" + proto = study_pb2.Trial.Parameter(parameter_id=name) + + if isinstance(parameter_value.value, int): + proto.value.number_value = parameter_value.value + elif isinstance(parameter_value.value, bool): + proto.value.bool_value = parameter_value.value + elif isinstance(parameter_value.value, float): + proto.value.number_value = parameter_value.value + elif isinstance(parameter_value.value, str): + proto.value.string_value = parameter_value.value + + return proto + + +class MeasurementConverter: + """Converter for trial.MeasurementConverter.""" + + @classmethod + def from_proto(cls, proto: study_pb2.Measurement) -> trial.Measurement: + """Creates a valid instance from proto. + + Args: + proto: Measurement proto. + + Returns: + A valid instance of Measurement object. Metrics with invalid values + are automatically filtered out. + """ + + metrics = dict() + + for metric in proto.metrics: + if metric.metric_id in metrics and metrics[ + metric.metric_id].value != metric.value: + logging.log_first_n( + logging.ERROR, 'Duplicate metric of name "%s".' + 'The newly found value %s will be used and ' + 'the previously found value %s will be discarded.' + 'This always happens if the proto has an empty-named metric.', 5, + metric.metric_id, metric.value, metrics[metric.metric_id].value) + try: + metrics[metric.metric_id] = trial.Metric(value=metric.value) + except ValueError: + pass + return trial.Measurement( + metrics=metrics, + elapsed_secs=proto.elapsed_duration.seconds, + steps=proto.step_count) + + @classmethod + def to_proto(cls, measurement: trial.Measurement) -> study_pb2.Measurement: + """Converts to Measurement proto.""" + proto = study_pb2.Measurement() + for name, metric in measurement.metrics.items(): + proto.metrics.append(study_pb2.Measurement.Metric(metric_id=name, value=metric.value)) + + proto.step_count = measurement.steps + int_seconds = int(measurement.elapsed_secs) + proto.elapsed_duration = duration_pb2.Duration(seconds=int_seconds, nanos=int(1e9 * (measurement.elapsed_secs - int_seconds))) + return proto + + +def _to_pyvizier_trial_status( + proto_state: study_pb2.Trial.State) -> trial.TrialStatus: + """from_proto conversion for Trial statuses.""" + if proto_state == study_pb2.Trial.State.REQUESTED: + return trial.TrialStatus.REQUESTED + elif proto_state == study_pb2.Trial.State.ACTIVE: + return trial.TrialStatus.ACTIVE + if proto_state == study_pb2.Trial.State.STOPPING: + return trial.TrialStatus.STOPPING + if proto_state == study_pb2.Trial.State.SUCCEEDED: + return trial.TrialStatus.COMPLETED + elif proto_state == study_pb2.Trial.State.INFEASIBLE: + return trial.TrialStatus.COMPLETED + else: + return trial.TrialStatus.UNKNOWN + + +def _from_pyvizier_trial_status(status: trial.TrialStatus, + infeasible: bool) -> study_pb2.Trial.State: + """to_proto conversion for Trial states.""" + if status == trial.TrialStatus.REQUESTED: + return study_pb2.Trial.State.REQUESTED + elif status == trial.TrialStatus.ACTIVE: + return study_pb2.Trial.State.ACTIVE + elif status == trial.TrialStatus.STOPPING: + return study_pb2.Trial.State.STOPPING + elif status == trial.TrialStatus.COMPLETED: + if infeasible: + return study_pb2.Trial.State.INFEASIBLE + else: + return study_pb2.Trial.State.SUCCEEDED + else: + return study_pb2.Trial.State.STATE_UNSPECIFIED + + +class TrialConverter: + """Converter for trial.TrialConverter.""" + + @classmethod + def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: + """Converts from Trial proto to object. + + Args: + proto: Trial proto. + + Returns: + A Trial object. + """ + parameters = {} + for parameter in proto.parameters: + value = ParameterValueConverter.from_proto(parameter) + if value is not None: + if parameter.parameter_id in parameters: + raise ValueError('Invalid trial proto contains duplicate parameter {}' + ': {}'.format(parameter.parameter_id, proto)) + parameters[parameter.parameter_id] = value + else: + logging.warning('A parameter without a value will be dropped: %s', + parameter) + + final_measurement = None + if proto.HasField('final_measurement'): + final_measurement = MeasurementConverter.from_proto( + proto.final_measurement) + + completion_time = None + infeasibility_reason = None + if proto.state == study_pb2.Trial.State.SUCCEEDED: + if proto.HasField('end_time'): + completion_ts = proto.end_time.seconds + 1e-9 * proto.end_time.nanos + completion_time = datetime.datetime.fromtimestamp(completion_ts) + elif proto.state == study_pb2.Trial.State.INFEASIBLE: + infeasibility_reason = proto.infeasible_reason + + metadata = trial.Metadata() + for kv in proto.metadata: + metadata.abs_ns(common.Namespace.decode(kv.ns))[kv.key] = ( + kv.proto if kv.HasField('proto') else kv.value) + + measurements = [] + for measure in proto.measurements: + measurements.append(MeasurementConverter.from_proto(measure)) + + creation_time = None + if proto.HasField('start_time'): + creation_ts = proto.start_time.seconds + 1e-9 * proto.start_time.nanos + creation_time = datetime.datetime.fromtimestamp(creation_ts) + return trial.Trial( + id=int(proto.id), + description=proto.name, + assigned_worker=proto.client_id or None, + is_requested=proto.state == proto.REQUESTED, + stopping_reason=('stopping reason not supported yet' + if proto.state == proto.STOPPING else None), + parameters=parameters, + creation_time=creation_time, + completion_time=completion_time, + infeasibility_reason=infeasibility_reason, + final_measurement=final_measurement, + measurements=measurements, + metadata=metadata) # pytype: disable=wrong-arg-types + + @classmethod + def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[trial.Trial]: + """Convenience wrapper for from_proto.""" + return [TrialConverter.from_proto(proto) for proto in protos] + + @classmethod + def to_protos(cls, pytrials: Sequence[trial.Trial]) -> List[study_pb2.Trial]: + return [TrialConverter.to_proto(pytrial) for pytrial in pytrials] + + @classmethod + def to_proto(cls, pytrial: trial.Trial) -> study_pb2.Trial: + """Converts a pyvizier Trial to a Trial proto.""" + proto = study_pb2.Trial() + if pytrial.description is not None: + proto.name = pytrial.description + proto.id = str(pytrial.id) + proto.state = _from_pyvizier_trial_status(pytrial.status, + pytrial.infeasible) + proto.client_id = pytrial.assigned_worker or '' + + for name, value in pytrial.parameters.items(): + proto.parameters.append(ParameterValueConverter.to_proto(value, name)) + + # pytrial always adds an empty metric. Ideally, we should remove it if the + # metric does not exist in the study config. + if pytrial.final_measurement is not None: + proto.final_measurement.CopyFrom( + MeasurementConverter.to_proto(pytrial.final_measurement)) + + for measurement in pytrial.measurements: + proto.measurements.append(MeasurementConverter.to_proto(measurement)) + + if pytrial.creation_time is not None: + creation_secs = datetime.datetime.timestamp(pytrial.creation_time) + proto.start_time.seconds = int(creation_secs) + proto.start_time.nanos = int(1e9 * (creation_secs - int(creation_secs))) + if pytrial.completion_time is not None: + completion_secs = datetime.datetime.timestamp(pytrial.completion_time) + proto.end_time.seconds = int(completion_secs) + proto.end_time.nanos = int(1e9 * (completion_secs - int(completion_secs))) + if pytrial.infeasibility_reason is not None: + proto.infeasible_reason = pytrial.infeasibility_reason + if pytrial.metadata is not None: + for ns in pytrial.metadata.namespaces(): + ns_string = ns.encode() + ns_layer = pytrial.metadata.abs_ns(ns) + for key, value in ns_layer.items(): + metadata_util.assign(proto, key=key, ns=ns_string, value=value) + return proto diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py new file mode 100644 index 0000000000..c1c3ebdffe --- /dev/null +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -0,0 +1,453 @@ +"""Convenience classes for configuring Vizier Study Configs and Search Spaces. + +This module contains several classes, used to access/build Vizier StudyConfig +protos: + * `StudyConfig` class is the main class, which: + 1) Allows to easily build Vizier StudyConfig protos via a convenient + Python API. + 2) Can be initialized from an existing StudyConfig proto, to enable easy + Pythonic accessors to information contained in StudyConfig protos, + and easy field editing capabilities. + + * `SearchSpace` and `SearchSpaceSelector` classes deals with Vizier search + spaces. Both flat spaces and conditional parameters are supported. +""" +import collections +import copy +import enum +from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union + +import attr +from google.cloud.aiplatform.vizier.pyvizier import automated_stopping +from google.cloud.aiplatform.vizier.pyvizier import metadata_util +from google.cloud.aiplatform.vizier.pyvizier import proto_converters +from google.cloud.aiplatform.vizier.pyvizier import base_study_config +from google.cloud.aiplatform.vizier.pyvizier import common +from google.cloud.aiplatform.vizier.pyvizier import parameter_config +from google.cloud.aiplatform.vizier.pyvizier import trial +from google.cloud.aiplatform.compat.types import study as study_pb2 + +################### PyTypes ################### +ScaleType = parameter_config.ScaleType +ExternalType = parameter_config.ExternalType +# A sequence of possible internal parameter values. +MonotypeParameterSequence = parameter_config.MonotypeParameterSequence +# Possible types for trial parameter values after cast to external types. +ParameterValueSequence = Union[trial.ParameterValueTypes, Sequence[int], + Sequence[float], Sequence[str], Sequence[bool]] + +################### Enums ################### + +# Values should NEVER be removed from ObjectiveMetricGoal, only added. +ObjectiveMetricGoal = base_study_config.ObjectiveMetricGoal + + +class Algorithm(enum.Enum): + """Valid Values for StudyConfig.Algorithm.""" + ALGORITHM_UNSPECIFIED = study_pb2.StudySpec.Algorithm.ALGORITHM_UNSPECIFIED + #GAUSSIAN_PROCESS_BANDIT = study_pb2.StudySpec.Algorithm.GAUSSIAN_PROCESS_BANDIT + GRID_SEARCH = study_pb2.StudySpec.Algorithm.GRID_SEARCH + RANDOM_SEARCH = study_pb2.StudySpec.Algorithm.RANDOM_SEARCH + #NSGA2 = study_pb2.StudySpec.Algorithm.NSGA2 + #EMUKIT_GP_EI = study_pb2.StudySpec.Algorithm.EMUKIT_GP_EI + + +class ObservationNoise(enum.Enum): + """Valid Values for StudyConfig.ObservationNoise.""" + OBSERVATION_NOISE_UNSPECIFIED = study_pb2.StudySpec.ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED + LOW = study_pb2.StudySpec.ObservationNoise.LOW + HIGH = study_pb2.StudySpec.ObservationNoise.HIGH + + +################### Classes For Various Config Protos ################### +@attr.define(frozen=False, init=True, slots=True, kw_only=True) +class MetricInformationConverter: + """A wrapper for vizier_pb2.MetricInformation.""" + + @classmethod + def from_proto( + cls, proto: study_pb2.StudySpec.MetricSpec + ) -> base_study_config.MetricInformation: + """Converts a MetricInformation proto to a MetricInformation object.""" + if proto.goal not in list(ObjectiveMetricGoal): + raise ValueError('Unknown MetricInformation.goal: {}'.format(proto.goal)) + + return base_study_config.MetricInformation( + name=proto.metric_id, + goal=proto.goal, + safety_threshold=None, + safety_std_threshold=None, + min_value=None, + max_value=None) + + @classmethod + def to_proto( + cls, obj: base_study_config.MetricInformation + ) -> study_pb2.StudySpec.MetricSpec: + """Returns this object as a proto.""" + return study_pb2.StudySpec.MetricSpec( + metric_id=obj.name, goal=obj.goal.value) + + +class MetricsConfig(base_study_config.MetricsConfig): + """Metrics config.""" + + @classmethod + def from_proto( + cls, protos: Iterable[study_pb2.StudySpec.MetricSpec]) -> 'MetricsConfig': + return cls(MetricInformationConverter.from_proto(m) for m in protos) + + def to_proto(self) -> List[study_pb2.StudySpec.MetricSpec]: + return [MetricInformationConverter.to_proto(metric) for metric in self] + + +SearchSpaceSelector = base_study_config.SearchSpaceSelector + + +@attr.define(frozen=True, init=True, slots=True, kw_only=True) +class SearchSpace(base_study_config.SearchSpace): + """A Selector for all, or part of a SearchSpace.""" + + @classmethod + def from_proto(cls, proto: study_pb2.StudySpec) -> 'SearchSpace': + """Extracts a SearchSpace object from a StudyConfig proto.""" + parameter_configs = [] + for pc in proto.parameters: + parameter_configs.append( + proto_converters.ParameterConfigConverter.from_proto(pc)) + return cls._factory(parameter_configs=parameter_configs) + + @property + def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: + """Returns the search space as a List of ParameterConfig protos.""" + return [ + proto_converters.ParameterConfigConverter.to_proto(pc) + for pc in self._parameter_configs + ] + + +################### Main Class ################### +# +# A StudyConfig object can be initialized: +# (1) From a StudyConfig proto using StudyConfig.from_proto(): +# study_config_proto = study_pb2.StudySpec(...) +# study_config = pyvizier.StudyConfig.from_proto(study_config_proto) +# # Attributes can be modified. +# study_config.metadata['metadata_key'] = 'metadata_value' +# new_proto = study_config.to_proto() +# +# (2) By directly calling __init__ and setting attributes: +# study_config = pyvizier.StudyConfig( +# metric_information=[pyvizier.MetricInformation( +# name='accuracy', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)], +# search_space=SearchSpace.from_proto(proto), +# ) +# # OR: +# study_config = pyvizier.StudyConfig() +# study_config.metric_information.append( +# pyvizier.MetricInformation( +# name='accuracy', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) +# +# # Since building a search space is more involved, get a reference to the +# # search space, and add parameters to it. +# root = study_config.search_space.select_root() +# root.add_float_param('learning_rate', 0.001, 1.0, +# scale_type=pyvizier.ScaleType.LOG) +# +@attr.define(frozen=False, init=True, slots=True, kw_only=True) +class StudyConfig(base_study_config.ProblemStatement): + """A builder and wrapper for study_pb2.StudySpec proto.""" + + search_space: SearchSpace = attr.field( + init=True, + factory=SearchSpace, + validator=attr.validators.instance_of(SearchSpace), + on_setattr=attr.setters.validate) + + algorithm: Algorithm = attr.field( + init=True, + validator=attr.validators.instance_of(Algorithm), + on_setattr=[attr.setters.convert, attr.setters.validate], + default=Algorithm.ALGORITHM_UNSPECIFIED, + kw_only=True) + + metric_information: MetricsConfig = attr.field( + init=True, + factory=MetricsConfig, + converter=MetricsConfig, + validator=attr.validators.instance_of(MetricsConfig), + kw_only=True) + + observation_noise: ObservationNoise = attr.field( + init=True, + validator=attr.validators.instance_of(ObservationNoise), + on_setattr=attr.setters.validate, + default=ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED, + kw_only=True) + + automated_stopping_config: Optional[ + automated_stopping.AutomatedStoppingConfig] = attr.field( + init=True, + default=None, + validator=attr.validators.optional( + attr.validators.instance_of( + automated_stopping.AutomatedStoppingConfig)), + on_setattr=attr.setters.validate, + kw_only=True) + + metadata: common.Metadata = attr.field( + init=True, + kw_only=True, + factory=common.Metadata, + validator=attr.validators.instance_of(common.Metadata), + on_setattr=[attr.setters.convert, attr.setters.validate]) + + # An internal representation as a StudyConfig proto. + # If this object was created from a StudyConfig proto, a copy of the original + # proto is kept, to make sure that unknown proto fields are preserved in + # round trip serialization. + # TODO: Fix the broken proto validation. + _study_config: study_pb2.StudySpec = attr.field( + init=True, + factory=study_pb2.StudySpec, + kw_only=True) + + # Public attributes, methods and properties. + @classmethod + def from_proto(cls, proto: study_pb2.StudySpec) -> 'StudyConfig': + """Converts a StudyConfig proto to a StudyConfig object. + + Args: + proto: StudyConfig proto. + + Returns: + A StudyConfig object. + """ + metric_information = MetricsConfig( + sorted( + [MetricInformationConverter.from_proto(m) for m in proto.metrics], + key=lambda x: x.name)) + + oneof_name = proto.WhichOneof('automated_stopping_spec') + if not oneof_name: + automated_stopping_config = None + else: + automated_stopping_config = automated_stopping.AutomatedStoppingConfig.from_proto( + getattr(proto, oneof_name)) + + metadata = common.Metadata() + for kv in proto.metadata: + metadata.abs_ns(common.Namespace.decode(kv.ns))[kv.key] = ( + kv.proto if kv.HasField('proto') else kv.value) + + return cls( + search_space=SearchSpace.from_proto(proto), + algorithm=Algorithm(proto.algorithm), + metric_information=metric_information, + observation_noise=ObservationNoise(proto.observation_noise), + automated_stopping_config=automated_stopping_config, + study_config=copy.deepcopy(proto), + metadata=metadata) + + def to_proto(self) -> study_pb2.StudySpec: + """Serializes this object to a StudyConfig proto.""" + proto = copy.deepcopy(self._study_config) + proto.algorithm = self.algorithm.value + proto.observation_noise = self.observation_noise.value + + del proto.metrics[:] + proto.metrics.extend(self.metric_information.to_proto()) + + del proto.parameters[:] + proto.parameters.extend(self.search_space.parameter_protos) + + if self.automated_stopping_config is not None: + auto_stop_proto = self.automated_stopping_config.to_proto() + if isinstance(auto_stop_proto, + study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec): + for method_name in dir(proto.decay_curve_stopping_spec): + if callable(getattr(proto.decay_curve_stopping_spec, method_name)): + print(method_name) + proto.decay_curve_stopping_spec = copy.deepcopy(auto_stop_proto) + elif isinstance(auto_stop_proto, + study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec): + for method_name in dir(proto.decay_curve_stopping_spec): + if callable(getattr(proto.median_automated_stopping_spec, method_name)): + print(method_name) + proto.median_automated_stopping_spec = copy.deepcopy(auto_stop_proto) + + for ns in self.metadata.namespaces(): + ns_string = ns.encode() + ns_layer = self.metadata.abs_ns(ns) + for key, value in ns_layer.items(): + metadata_util.assign(proto, key=key, ns=ns_string, value=value) + return proto + + @property + def is_single_objective(self) -> bool: + """Returns True if only one objective metric is configured.""" + return len(self.metric_information) == 1 + + @property + def single_objective_metric_name(self) -> Optional[str]: + """Returns the name of the single-objective metric, if set. + + Returns: + String: name of the single-objective metric. + None: if this is not a single-objective study. + """ + if len(self.metric_information) == 1: + return list(self.metric_information)[0].name + return None + + def _trial_to_external_values( + self, pytrial: trial.Trial) -> Dict[str, Union[float, int, str, bool]]: + """Returns the trial paremeter values cast to external types.""" + parameter_values: Dict[str, Union[float, int, str]] = {} + external_values: Dict[str, Union[float, int, str, bool]] = {} + # parameter_configs is a list of Tuple[parent_name, ParameterConfig]. + parameter_configs: List[Tuple[Optional[str], + parameter_config.ParameterConfig]] = [ + (None, p) + for p in self.search_space.parameters + ] + remaining_parameters = copy.deepcopy(pytrial.parameters) + # Traverse the conditional tree using a BFS. + while parameter_configs and remaining_parameters: + parent_name, pc = parameter_configs.pop(0) + parameter_configs.extend( + (pc.name, child) for child in pc.child_parameter_configs) + if pc.name not in remaining_parameters: + continue + if parent_name is not None: + # This is a child parameter. If the parent was not seen, + # skip this parameter config. + if parent_name not in parameter_values: + continue + parent_value = parameter_values[parent_name] + if parent_value not in pc.matching_parent_values: + continue + parameter_values[pc.name] = remaining_parameters[pc.name].value + if pc.external_type is None: + external_value = remaining_parameters[pc.name].value + else: + external_value = remaining_parameters[pc.name].cast(pc.external_type) # pytype: disable=wrong-arg-types + external_values[pc.name] = external_value + remaining_parameters.pop(pc.name) + return external_values + + def trial_parameters( + self, proto: study_pb2.Trial) -> Dict[str, ParameterValueSequence]: + """Returns the trial values, cast to external types, if they exist. + + Args: + proto: + + Returns: + Parameter values dict: cast to each parameter's external_type, if exists. + NOTE that the values in the dict may be a Sequence as opposed to a single + element. + + Raises: + ValueError: If the trial parameters do not exist in this search space. + ValueError: If the trial contains duplicate parameters. + """ + pytrial = proto_converters.TrialConverter.from_proto(proto) + return self._pytrial_parameters(pytrial) + + def _pytrial_parameters( + self, pytrial: trial.Trial) -> Dict[str, ParameterValueSequence]: + """Returns the trial values, cast to external types, if they exist. + + Args: + pytrial: + + Returns: + Parameter values dict: cast to each parameter's external_type, if exists. + NOTE that the values in the dict may be a Sequence as opposed to a single + element. + + Raises: + ValueError: If the trial parameters do not exist in this search space. + ValueError: If the trial contains duplicate parameters. + """ + trial_external_values: Dict[str, Union[float, int, str, bool]] = ( + self._trial_to_external_values(pytrial)) + if len(trial_external_values) != len(pytrial.parameters): + raise ValueError('Invalid trial for this search space: failed to convert ' + 'all trial parameters: {}'.format(pytrial)) + + # Combine multi-dimensional parameter values to a list of values. + trial_final_values: Dict[str, ParameterValueSequence] = {} + # multi_dim_params: Dict[str, List[Tuple[int, ParameterValueSequence]]] + multi_dim_params = collections.defaultdict(list) + for name in trial_external_values: + base_index = SearchSpaceSelector.parse_multi_dimensional_parameter_name( + name) + if base_index is None: + trial_final_values[name] = trial_external_values[name] + else: + base_name, index = base_index + multi_dim_params[base_name].append((index, trial_external_values[name])) + for name in multi_dim_params: + multi_dim_params[name].sort(key=lambda x: x[0]) + trial_final_values[name] = [x[1] for x in multi_dim_params[name]] + + return trial_final_values + + def trial_metrics(self, + proto: study_pb2.Trial, + *, + include_all_metrics=False) -> Dict[str, float]: + """Returns the trial's final measurement metric values. + + If the trial is not completed, or infeasible, no metrics are returned. + By default, only metrics configured in the StudyConfig are returned + (e.g. only objective and safety metrics). + + Args: + proto: + include_all_metrics: If True, all metrics in the final measurements are + returned. If False, only metrics configured in the StudyConfig are + returned. + + Returns: + Dict[metric name, metric value] + """ + pytrial = proto_converters.TrialConverter.from_proto(proto) + return self._pytrial_metrics( + pytrial, include_all_metrics=include_all_metrics) + + def _pytrial_metrics(self, + pytrial: trial.Trial, + *, + include_all_metrics=False) -> Dict[str, float]: + """Returns the trial's final measurement metric values. + + If the trial is not completed, or infeasible, no metrics are returned. + By default, only metrics configured in the StudyConfig are returned + (e.g. only objective and safety metrics). + + Args: + pytrial: + include_all_metrics: If True, all metrics in the final measurements are + returned. If False, only metrics configured in the StudyConfig are + returned. + + Returns: + Dict[metric name, metric value] + """ + configured_metrics = [m.name for m in self.metric_information] + + metrics: Dict[str, float] = {} + if pytrial.is_completed and not pytrial.infeasible: + for name in pytrial.final_measurement.metrics: + if (include_all_metrics or + (not include_all_metrics and name in configured_metrics)): + # Special case: Measurement always adds an empty metric by default. + # If there is a named single objective in study_config, drop the empty + # metric. + if not name and self.single_objective_metric_name != name: + continue + metrics[name] = pytrial.final_measurement.metrics[name].value + return metrics From 46cbe4aa00b6f0cc655d9dfe13e5ca0a28ecf54a Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 3 Jun 2022 13:27:33 -0700 Subject: [PATCH 05/36] imported the objects from the pyvizier. --- google/cloud/aiplatform/vizier/pyvizier/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index 708302d393..073e10c4c2 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -10,6 +10,11 @@ from google.cloud.aiplatform.vizier.pyvizier.common import Metadata from google.cloud.aiplatform.vizier.pyvizier.common import MetadataValue from google.cloud.aiplatform.vizier.pyvizier.common import Namespace +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ParameterConfigConverter +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import MeasurementConverter +from google.cloud.aiplatform.vizier.pyvizier.study_config import StudyConfig +from google.cloud.aiplatform.vizier.pyvizier.study_config import Algorithm +from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import AutomatedStoppingConfig from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ExternalType from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterConfig from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterType @@ -24,4 +29,3 @@ from google.cloud.aiplatform.vizier.pyvizier.trial import TrialStatus from google.cloud.aiplatform.vizier.pyvizier.trial import TrialSuggestion -StudyConfig = ProblemStatement # To be deprecated. From 8122807e683d2381283d60271cc4d7cd450b92f1 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 14 Jun 2022 13:56:43 -0700 Subject: [PATCH 06/36] Added the Vizier client and types in the aiplatform init file. --- google/cloud/aiplatform/compat/__init__.py | 4 ++++ google/cloud/aiplatform/compat/types/__init__.py | 2 ++ google/cloud/aiplatform/vizier/pyvizier/__init__.py | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/compat/__init__.py b/google/cloud/aiplatform/compat/__init__.py index 6aea51d133..88ed87c5cd 100644 --- a/google/cloud/aiplatform/compat/__init__.py +++ b/google/cloud/aiplatform/compat/__init__.py @@ -44,6 +44,7 @@ services.index_endpoint_service_client = ( services.index_endpoint_service_client_v1beta1 ) + services.vizier_service_client = services.vizier_service_client_v1beta1 types.accelerator_type = types.accelerator_type_v1beta1 types.annotation = types.annotation_v1beta1 @@ -110,6 +111,7 @@ types.tensorboard_time_series = types.tensorboard_time_series_v1beta1 types.training_pipeline = types.training_pipeline_v1beta1 types.types = types.types_v1beta1 + types.vizier_service = types.vizier_service_v1beta1 if DEFAULT_VERSION == V1: @@ -127,6 +129,7 @@ services.tensorboard_service_client = services.tensorboard_service_client_v1 services.index_service_client = services.index_service_client_v1 services.index_endpoint_service_client = services.index_endpoint_service_client_v1 + services.vizier_service_client = services.vizier_service_client_v1 types.accelerator_type = types.accelerator_type_v1 types.annotation = types.annotation_v1 @@ -192,6 +195,7 @@ types.tensorboard_time_series = types.tensorboard_time_series_v1 types.training_pipeline = types.training_pipeline_v1 types.types = types.types_v1 + types.vizier_service = types.vizier_service_v1 __all__ = ( DEFAULT_VERSION, diff --git a/google/cloud/aiplatform/compat/types/__init__.py b/google/cloud/aiplatform/compat/types/__init__.py index 14ff93f011..2d0380879a 100644 --- a/google/cloud/aiplatform/compat/types/__init__.py +++ b/google/cloud/aiplatform/compat/types/__init__.py @@ -76,6 +76,7 @@ tensorboard_time_series as tensorboard_time_series_v1beta1, training_pipeline as training_pipeline_v1beta1, types as types_v1beta1, + vizier_service as vizier_service_v1beta1, ) from google.cloud.aiplatform_v1.types import ( accelerator_type as accelerator_type_v1, @@ -137,6 +138,7 @@ tensorboard_time_series as tensorboard_time_series_v1, training_pipeline as training_pipeline_v1, types as types_v1, + vizier_service as vizier_service_v1, ) __all__ = ( diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index 073e10c4c2..a2d85c8a7c 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -12,6 +12,7 @@ from google.cloud.aiplatform.vizier.pyvizier.common import Namespace from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ParameterConfigConverter from google.cloud.aiplatform.vizier.pyvizier.proto_converters import MeasurementConverter +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import TrialConverter from google.cloud.aiplatform.vizier.pyvizier.study_config import StudyConfig from google.cloud.aiplatform.vizier.pyvizier.study_config import Algorithm from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import AutomatedStoppingConfig @@ -28,4 +29,3 @@ from google.cloud.aiplatform.vizier.pyvizier.trial import TrialFilter from google.cloud.aiplatform.vizier.pyvizier.trial import TrialStatus from google.cloud.aiplatform.vizier.pyvizier.trial import TrialSuggestion - From bb7c73113fba8846e5177e24a09c50e3566869d2 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 14 Jun 2022 15:07:35 -0700 Subject: [PATCH 07/36] Made the pyvizier converters compatible with the Vertex SDK proto. --- .../vizier/pyvizier/proto_converters.py | 40 ++++++------------- .../vizier/pyvizier/study_config.py | 26 ++---------- .../cloud/aiplatform/vizier/pyvizier/trial.py | 16 +------- 3 files changed, 17 insertions(+), 65 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py index 5cfe28f337..b4d05d2d01 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -105,7 +105,7 @@ def from_proto( ValueError: See the "strict_validtion" arg documentation. """ feasible_values = [] - oneof_name = proto.WhichOneof('parameter_value_spec') + oneof_name = proto._pb.WhichOneof('parameter_value_spec') if oneof_name == 'integer_value_spec': bounds = (int(proto.integer_value_spec.min_value), int(proto.integer_value_spec.max_value)) @@ -120,8 +120,8 @@ def from_proto( feasible_values = proto.categorical_value_spec.values default_value = None - if getattr(proto, oneof_name).default_value.value: - default_value = getattr(proto, oneof_name).default_value.value + if getattr(proto, oneof_name).default_value: + default_value = getattr(proto, oneof_name).default_value if proto.conditional_parameter_specs: children = [] @@ -230,9 +230,7 @@ class ParameterValueConverter: def from_proto( cls, proto: study_pb2.Trial.Parameter) -> Optional[trial.ParameterValue]: """Returns whichever value that is populated, or None.""" - value_proto = proto.value - oneof_name = value_proto.WhichOneof('kind') - potential_value = getattr(value_proto, oneof_name) + potential_value = proto.value if isinstance(potential_value, float) or isinstance( potential_value, str) or isinstance(potential_value, bool): return trial.ParameterValue(potential_value) @@ -366,46 +364,40 @@ def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: parameter) final_measurement = None - if proto.HasField('final_measurement'): + if proto.final_measurement: final_measurement = MeasurementConverter.from_proto( proto.final_measurement) completion_time = None infeasibility_reason = None if proto.state == study_pb2.Trial.State.SUCCEEDED: - if proto.HasField('end_time'): - completion_ts = proto.end_time.seconds + 1e-9 * proto.end_time.nanos + if proto.end_time: + completion_ts = proto.end_time.nanosecond / 1e9 completion_time = datetime.datetime.fromtimestamp(completion_ts) elif proto.state == study_pb2.Trial.State.INFEASIBLE: infeasibility_reason = proto.infeasible_reason - metadata = trial.Metadata() - for kv in proto.metadata: - metadata.abs_ns(common.Namespace.decode(kv.ns))[kv.key] = ( - kv.proto if kv.HasField('proto') else kv.value) - measurements = [] for measure in proto.measurements: measurements.append(MeasurementConverter.from_proto(measure)) creation_time = None - if proto.HasField('start_time'): - creation_ts = proto.start_time.seconds + 1e-9 * proto.start_time.nanos + if proto.start_time: + creation_ts = proto.start_time.nanosecond / 1e9 creation_time = datetime.datetime.fromtimestamp(creation_ts) return trial.Trial( - id=int(proto.id), + id=int(proto.name.split('/')[-1]), description=proto.name, assigned_worker=proto.client_id or None, - is_requested=proto.state == proto.REQUESTED, + is_requested=proto.state == study_pb2.Trial.State.REQUESTED, stopping_reason=('stopping reason not supported yet' - if proto.state == proto.STOPPING else None), + if proto.state == study_pb2.Trial.State.STOPPING else None), parameters=parameters, creation_time=creation_time, completion_time=completion_time, infeasibility_reason=infeasibility_reason, final_measurement=final_measurement, - measurements=measurements, - metadata=metadata) # pytype: disable=wrong-arg-types + measurements=measurements) # pytype: disable=wrong-arg-types @classmethod def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[trial.Trial]: @@ -449,10 +441,4 @@ def to_proto(cls, pytrial: trial.Trial) -> study_pb2.Trial: proto.end_time.nanos = int(1e9 * (completion_secs - int(completion_secs))) if pytrial.infeasibility_reason is not None: proto.infeasible_reason = pytrial.infeasibility_reason - if pytrial.metadata is not None: - for ns in pytrial.metadata.namespaces(): - ns_string = ns.encode() - ns_layer = pytrial.metadata.abs_ns(ns) - for key, value in ns_layer.items(): - metadata_util.assign(proto, key=key, ns=ns_string, value=value) return proto diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index c1c3ebdffe..3ce50e910c 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -19,7 +19,6 @@ import attr from google.cloud.aiplatform.vizier.pyvizier import automated_stopping -from google.cloud.aiplatform.vizier.pyvizier import metadata_util from google.cloud.aiplatform.vizier.pyvizier import proto_converters from google.cloud.aiplatform.vizier.pyvizier import base_study_config from google.cloud.aiplatform.vizier.pyvizier import common @@ -133,7 +132,6 @@ def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: # study_config_proto = study_pb2.StudySpec(...) # study_config = pyvizier.StudyConfig.from_proto(study_config_proto) # # Attributes can be modified. -# study_config.metadata['metadata_key'] = 'metadata_value' # new_proto = study_config.to_proto() # # (2) By directly calling __init__ and setting attributes: @@ -195,13 +193,6 @@ class StudyConfig(base_study_config.ProblemStatement): on_setattr=attr.setters.validate, kw_only=True) - metadata: common.Metadata = attr.field( - init=True, - kw_only=True, - factory=common.Metadata, - validator=attr.validators.instance_of(common.Metadata), - on_setattr=[attr.setters.convert, attr.setters.validate]) - # An internal representation as a StudyConfig proto. # If this object was created from a StudyConfig proto, a copy of the original # proto is kept, to make sure that unknown proto fields are preserved in @@ -227,27 +218,21 @@ def from_proto(cls, proto: study_pb2.StudySpec) -> 'StudyConfig': sorted( [MetricInformationConverter.from_proto(m) for m in proto.metrics], key=lambda x: x.name)) - - oneof_name = proto.WhichOneof('automated_stopping_spec') + + oneof_name = proto._pb.WhichOneof('automated_stopping_spec') if not oneof_name: automated_stopping_config = None else: automated_stopping_config = automated_stopping.AutomatedStoppingConfig.from_proto( getattr(proto, oneof_name)) - metadata = common.Metadata() - for kv in proto.metadata: - metadata.abs_ns(common.Namespace.decode(kv.ns))[kv.key] = ( - kv.proto if kv.HasField('proto') else kv.value) - return cls( search_space=SearchSpace.from_proto(proto), algorithm=Algorithm(proto.algorithm), metric_information=metric_information, observation_noise=ObservationNoise(proto.observation_noise), automated_stopping_config=automated_stopping_config, - study_config=copy.deepcopy(proto), - metadata=metadata) + study_config=copy.deepcopy(proto)) def to_proto(self) -> study_pb2.StudySpec: """Serializes this object to a StudyConfig proto.""" @@ -276,11 +261,6 @@ def to_proto(self) -> study_pb2.StudySpec: print(method_name) proto.median_automated_stopping_spec = copy.deepcopy(auto_stop_proto) - for ns in self.metadata.namespaces(): - ns_string = ns.encode() - ns_layer = self.metadata.abs_ns(ns) - for key, value in ns_layer.items(): - metadata_util.assign(proto, key=key, ns=ns_string, value=value) return proto @property diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py index 476691e52b..b86364ba21 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -22,7 +22,6 @@ ParameterValueTypes = Union[str, int, float, bool] OrderedDict = collections.OrderedDict -Metadata = common.Metadata class ExternalType(enum.Enum): @@ -332,13 +331,6 @@ class Trial: converter=ParameterDict, validator=attr.validators.instance_of(ParameterDict)) - metadata: Metadata = attr.ib( - init=True, - kw_only=True, - default=Metadata(), - validator=attr.validators.instance_of(Metadata), - ) - related_links: Dict[str, str] = attr.ib( init=True, kw_only=True, @@ -485,12 +477,6 @@ class TrialSuggestion: converter=ParameterDict, validator=attr.validators.instance_of(ParameterDict)) # pytype: disable=wrong-arg-types - metadata: Metadata = attr.field( - init=True, - kw_only=True, - factory=Metadata, - validator=attr.validators.instance_of(Metadata)) - def to_trial(self, uid: int) -> Trial: """Assign an id and make it a Trial object. @@ -504,7 +490,7 @@ def to_trial(self, uid: int) -> Trial: Returns: Trial object. """ - return Trial(id=uid, parameters=self.parameters, metadata=self.metadata) + return Trial(id=uid, parameters=self.parameters) @attr.define class TrialFilter: From 24425ae3c6583f3cd28e4257fcf2cdcd6a27c133 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 14 Jun 2022 15:12:59 -0700 Subject: [PATCH 08/36] Forked the framework for the Vertex Vizier from the Open Source Vizier. --- google/cloud/aiplatform/vizier/client_abc.py | 180 +++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 google/cloud/aiplatform/vizier/client_abc.py diff --git a/google/cloud/aiplatform/vizier/client_abc.py b/google/cloud/aiplatform/vizier/client_abc.py new file mode 100644 index 0000000000..8e36a0b4cd --- /dev/null +++ b/google/cloud/aiplatform/vizier/client_abc.py @@ -0,0 +1,180 @@ +"""Cross-platform Vizier client interfaces. + +Aside from "materialize_" methods, code written using these interfaces are +compatible with OSS and Cloud Vertex Vizier. Note importantly that subclasses +may have more methods than what is required by interfaces, and such methods +are not cross compatible. Our recommendation is to explicitly type your objects +to be `StudyInterface` or `TrialInterface` when you want to guarantee that +a code block is cross-platform. + +Keywords: + +#Materialize: The method returns a deep copy of the underlying pyvizier object. +Modifying the returned object does not update the Vizier service. +""" + +# TODO(b/182496749): Add a dedicated .md file with more code examples. +from __future__ import annotations + +import abc + +from typing import Optional, Collection, Type, TypeVar, Mapping, Any + +from google.cloud.aiplatform.vizier import pyvizier as vz + +_T = TypeVar('_T') + + +class ResourceNotFoundError(LookupError): + """Error raised by Vizier clients when resource is not found.""" + pass + + +class TrialInterface(abc.ABC): + """Responsible for trial-level operations.""" + + @property + @abc.abstractmethod + def uid(self) -> int: + """Unique identifier of the trial.""" + + @property + @abc.abstractmethod + def parameters(self) -> Mapping[str, Any]: + """Parameters of the trial.""" + + @property + @abc.abstractmethod + def status(self) -> vz.TrialStatus: + """Trial's status.""" + + @abc.abstractmethod + def delete(self) -> None: + """Delete the Trial in Vizier service. + + There is currently no promise on how this object behaves after `delete()`. + If you are sharing a Trial object in parallel processes, proceed with + caution. + """ + + @abc.abstractmethod + def complete( + self, + measurement: Optional[vz.Measurement] = None, + *, + infeasible_reason: Optional[str] = None) -> Optional[vz.Measurement]: + """Completes the trial and #materializes the measurement. + + * If `measurement` is provided, then Vizier writes it as the trial's final + measurement and returns it. + * If `infeasible_reason` is provided, `measurement` is not needed. + * If neither is provided, then Vizier selects an existing (intermediate) + measurement to be the final measurement and returns it. + + Args: + measurement: Final measurement. + infeasible_reason: Infeasible reason for missing final measurement. + + Returns: + The final measurement of the trial, or None if the trial is marked + infeasible. + + Raises: + ValueError: If neither `measurement` nor `infeasible_reason` is provided + but the trial does not contain any intermediate measurements. + """ + + @abc.abstractmethod + def should_stop(self) -> bool: + """Returns true if the trial should stop.""" + + @abc.abstractmethod + def add_measurement(self, measurement: vz.Measurement) -> None: + """Adds an intermediate measurement.""" + + @abc.abstractmethod + def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial: + """#Materializes the Trial. + + Args: + include_all_measurements: If True, returned Trial includes all + intermediate measurements. The final measurement is always provided. + + Returns: + Trial object. + """ + + +class StudyInterface(abc.ABC): + """Responsible for study-level operations.""" + + @abc.abstractmethod + def create_or_load(self, + display_name: str, + problem: vz.ProblemStatement) -> StudyInterface: + """ + """ + + @abc.abstractmethod + def suggest(self, + *, + count: Optional[int] = None, + worker: str = '') -> Collection[TrialInterface]: + """Returns Trials to be evaluated by worker. + + Args: + count: Number of suggestions. + worker: When new Trials are generated, their `assigned_worker` field is + populated with this worker. suggest() first looks for existing Trials + that are assigned to `worker`, before generating new ones. + + Returns: + Trials. + """ + + @abc.abstractmethod + def delete(self) -> None: + """Deletes the study.""" + + @abc.abstractmethod + def trials( + self, + trial_filter: Optional[vz.TrialFilter] = None + ) -> Collection[TrialInterface]: + """Fetches a collection of trials.""" + + @abc.abstractmethod + def get_trial(self, uid: int) -> TrialInterface: + """Fetches a single trial. + + Args: + uid: Unique identifier of the trial within study. + + Returns: + Trial. + + Raises: + ResourceNotFoundError: If trial does not exist. + """ + + @abc.abstractmethod + def optimal_trials(self) -> Collection[TrialInterface]: + """Returns optimal trial(s).""" + + @abc.abstractmethod + def materialize_study_config(self) -> vz.StudyConfig: + """#Materializes the study config.""" + + @abc.abstractclassmethod + def from_uid(cls: Type[_T], uid: str) -> _T: + """Fetches an existing study from the Vizier service. + + Args: + uid: Unique identifier of the study. + + Returns: + Study. + + Raises: + ResourceNotFoundError: If study does not exist. + """ From 6f24f9f48f6d691e9b92d088ab1cd7cfbd9bbd34 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 14 Jun 2022 15:16:50 -0700 Subject: [PATCH 09/36] Implemented the interfaces for Study and Trial. Added the unit tests and system tests for them. --- google/cloud/aiplatform/vizier/study.py | 196 +++++++++ google/cloud/aiplatform/vizier/trial.py | 173 ++++++++ tests/system/aiplatform/test_vizier.py | 87 ++++ tests/unit/aiplatform/test_vizier.py | 513 ++++++++++++++++++++++++ 4 files changed, 969 insertions(+) create mode 100644 google/cloud/aiplatform/vizier/study.py create mode 100644 google/cloud/aiplatform/vizier/trial.py create mode 100644 tests/system/aiplatform/test_vizier.py create mode 100644 tests/unit/aiplatform/test_vizier.py diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py new file mode 100644 index 0000000000..121aac70f3 --- /dev/null +++ b/google/cloud/aiplatform/vizier/study.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy + +from typing import Dict, List, Optional, Sequence, Tuple +from typing import Optional, Collection, Type, TypeVar, Mapping, Any +from google.cloud.aiplatform.vizier.client_abc import StudyInterface +from google.cloud.aiplatform.vizier.client_abc import TrialInterface + +from google.api_core import exceptions +from google.auth import credentials as auth_credentials +from google.cloud.aiplatform import base +from google.cloud.aiplatform import utils +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform.vizier import study +from google.cloud.aiplatform.vizier.trial import Trial +from google.cloud.aiplatform.vizier import pyvizier as vz + +from google.cloud.aiplatform.compat.services import vizier_service_client_v1 +from google.cloud.aiplatform.compat.types import ( + study as gca_study, + vizier_service as gca_vizier_service +) + + +_T = TypeVar('_T') +_LOGGER = base.Logger(__name__) + +class Study(base.VertexAiResourceNounWithFutureManager, StudyInterface): + """Manage Study resource for Vertex Vizier.""" + + client_class = utils.VizierClientWithOverride + + _resource_noun = "study" + _getter_method = "get_study" + _list_method = "list_studies" + _delete_method = "delete_study" + _parse_resource_name_method = "parse_study_path" + _format_resource_name_method = "study_path" + + def __init__( + self, + study_id: str, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves an existing managed study given a study resource name or a study id. + + Example Usage: + study = aiplatform.Study(study_id = '12345678') + or + study = aiplatform.Study(study_id = 'projects/123/locations/us-central1/studies/12345678') + + Args: + study_id (str): + Required. A fully-qualified study resource name or a study ID. + Example: "projects/123/locations/us-central1/studies/12345678" or "12345678" when + project and location are initialized or passed. + project (str): + Optional. Project to retrieve feature from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve feature from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve this Feature. Overrides + credentials set in aiplatform.init. + """ + base.VertexAiResourceNounWithFutureManager.__init__( + self, + project=project, + location=location, + credentials=credentials, + resource_name=study_id, + ) + self._gca_resource = self._get_gca_resource(resource_name=study_id) + + @classmethod + @base.optional_sync() + def create_or_load(cls, display_name: str, problem: vz.ProblemStatement) -> StudyInterface: + """Creates a Study resource. + + Example Usage: + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param( + 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + study = aiplatform.Study.create_or_load(display_name='display_name', problem=sc) + + Args: + display_name (str): + A name to describe the Study. + problem (vz.ProblemStatement): + Configurations of the study. It defines the problem to create the study. + """ + api_client = cls._instantiate_client(location=initializer.global_config.location, credentials=initializer.global_config.credentials) + study = gca_study.Study( + display_name = display_name, + study_spec = problem.to_proto() + ) + + try: + study = api_client.create_study(parent = initializer.global_config.common_location_path(initializer.global_config.project, initializer.global_config.location), study = study) + except exceptions.AlreadyExists: + _LOGGER.info("The study is aleady created. Using existing study.") + study = api_client.lookup_study(request = { + "parent" : initializer.global_config.common_location_path(initializer.global_config.project, initializer.global_config.location), + "display_name" : display_name}) + + return Study(study.name) + + def get_trial(self, uid: int) -> TrialInterface: + """Retrieves the trial under the study by given trial id.""" + study_path_components = self._parse_resource_name(self.resource_name) + _LOGGER.info(study_path_components) + return Trial(Trial._format_resource_name( + project=study_path_components["project"], + location=study_path_components["location"], + study=study_path_components["study"], + trial=uid)) + + def trials( + self, + trial_filter: Optional[vz.TrialFilter] = None + ) -> Collection[TrialInterface]: + """Fetches a collection of trials.""" + list_trials_request = {"parent": self.resource_name} + trials_response = self.api_client.list_trials(request=list_trials_request) + return [Trial(trial.name) for trial in trials_response.trials] + + def optimal_trials(self) -> Collection[TrialInterface]: + """Returns optimal trial(s).""" + list_optimal_trials_request = {"parent": self.resource_name} + optimal_trials_response = self.api_client.list_optimal_trials(request=list_optimal_trials_request) + return [Trial(trial.name) for trial in optimal_trials_response.optimal_trials] + + def materialize_study_config(self) -> vz.StudyConfig: + """#Materializes the study config.""" + study = self.api_client.get_study(name = self.resource_name) + return copy.deepcopy(vz.StudyConfig.from_proto(study.study_spec)) + + @classmethod + def from_uid(cls: Type[_T], uid: str) -> _T: + """Fetches an existing study from the Vizier service. + + Args: + uid: Unique identifier of the study. + """ + return Study(study_id = uid) + + def suggest(self, + *, + count: Optional[int] = None, + worker: str = '') -> Collection[TrialInterface]: + """Returns Trials to be evaluated by worker. + + Args: + count: Number of suggestions. + worker: When new Trials are generated, their `assigned_worker` field is + populated with this worker. suggest() first looks for existing Trials + that are assigned to `worker`, before generating new ones. + """ + suggest_trials_lro = self.api_client.suggest_trials(request={ + "parent": self.resource_name, + "suggestion_count": count, + "client_id": worker}) + _LOGGER.log_action_started_against_resource_with_lro( + "Suggest", "study", self.__class__, suggest_trials_lro) + _LOGGER.info(self.client_class.get_gapic_client_class()) + trials = suggest_trials_lro.result() + _LOGGER.log_action_completed_against_resource("study", "suggested", self) + return [Trial(trial.name) for trial in trials.trials] + + + def delete(self) -> None: + """Deletes the study.""" + self.api_client.delete_study(name = self.resource_name) diff --git a/google/cloud/aiplatform/vizier/trial.py b/google/cloud/aiplatform/vizier/trial.py new file mode 100644 index 0000000000..7be711e530 --- /dev/null +++ b/google/cloud/aiplatform/vizier/trial.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy + +from typing import Dict, List, Optional, Sequence, Tuple +from typing import Optional, Collection, Type, TypeVar, Mapping, Any +from google.cloud.aiplatform.vizier.client_abc import StudyInterface +from google.cloud.aiplatform.vizier.client_abc import TrialInterface + +from google.auth import credentials as auth_credentials +from google.cloud.aiplatform import base +from google.cloud.aiplatform import utils +from google.cloud.aiplatform.vizier import study +from google.cloud.aiplatform.vizier import pyvizier as vz + +from google.cloud.aiplatform.compat.services import vizier_service_client_v1 + +_T = TypeVar('_T') +_LOGGER = base.Logger(__name__) + + +class Trial(base.VertexAiResourceNounWithFutureManager, TrialInterface): + """Manage Trial resource for Vertex Vizier.""" + + client_class = utils.VizierClientWithOverride + + _resource_noun = "trial" + _getter_method = "get_trial" + _list_method = "list_trials" + _delete_method = "delete_trial" + _parse_resource_name_method = "parse_trial_path" + _format_resource_name_method = "trial_path" + + def __init__( + self, + trial_name: str, + study_id: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves an existing managed trial given a trial resource name or a study id. + + Example Usage: + trial = aiplatform.Trial(trial_name = 'projects/123/locations/us-central1/studies/12345678/trials/1') + or + trial = aiplatform.Trial(trial_name = '1', study_id = '12345678') + + Args: + trial_name (str): + Required. A fully-qualified trial resource name or a trial ID. + Example: "projects/123/locations/us-central1/studies/12345678/trials/1" or "12345678" when + project and location are initialized or passed. + study_id (str): + Optional. A fully-qualified study resource name or a study ID. + Example: "projects/123/locations/us-central1/studies/12345678" or "12345678" when + project and location are initialized or passed. + project (str): + Optional. Project to retrieve feature from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve feature from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve this Feature. Overrides + credentials set in aiplatform.init. + """ + + base.VertexAiResourceNounWithFutureManager.__init__( + self, + project=project, + location=location, + credentials=credentials, + resource_name=trial_name, + ) + self._gca_resource = self._get_gca_resource( + resource_name=trial_name, + parent_resource_name_fields={ + study.Study._resource_noun: study_id, + } + if study_id + else study_id, + ) + + + @property + def uid(self) -> int: + """Unique identifier of the trial.""" + trial_path_components = self._parse_resource_name(self.resource_name) + return int(trial_path_components["trial"]) + + @property + def parameters(self) -> Mapping[str, Any]: + """Parameters of the trial.""" + trial = self.api_client.get_trial(name = self.resource_name) + return vz.TrialConverter.from_proto(trial).parameters + + @property + def status(self) -> vz.TrialStatus: + """Status of the Trial.""" + trial = self.api_client.get_trial(name = self.resource_name) + return vz.TrialConverter.from_proto(trial).status + + def delete(self) -> None: + """Deletes the Trial in Vizier service.""" + self.api_client.delete_trial(name=self.resource_name) + + def complete( + self, + measurement: Optional[vz.Measurement] = None, + *, + infeasible_reason: Optional[str] = None) -> Optional[vz.Measurement]: + """Completes the trial and #materializes the measurement. + + * If `measurement` is provided, then Vizier writes it as the trial's final + measurement and returns it. + * If `infeasible_reason` is provided, `measurement` is not needed. + * If neither is provided, then Vizier selects an existing (intermediate) + measurement to be the final measurement and returns it. + + Args: + measurement: Final measurement. + infeasible_reason: Infeasible reason for missing final measurement. + """ + complete_trial_request = {'name' : self.resource_name} + if infeasible_reason is not None: + complete_trial_request['infeasible_reason'] = infeasible_reason + complete_trial_request['trial_infeasible'] = True + if measurement is not None: + complete_trial_request['final_measurement'] = vz.MeasurementConverter.to_proto(measurement) + trial = self.api_client.complete_trial(request=complete_trial_request) + return vz.MeasurementConverter.from_proto(trial.final_measurement) if trial.final_measurement else None + + def should_stop(self) -> bool: + """Returns true if the Trial should stop.""" + check_trial_early_stopping_state_request = {'trial_name' : self.resource_name} + should_stop_lro = self.api_client.check_trial_early_stopping_state(request=check_trial_early_stopping_state_request) + _LOGGER.log_action_started_against_resource_with_lro( + "ShouldStop", "trial", self.__class__, should_stop_lro) + should_stop_lro.result() + _LOGGER.log_action_completed_against_resource("trial", "should_stop", self) + return should_stop_lro.result().should_stop + + def add_measurement(self, measurement: vz.Measurement) -> None: + """Adds an intermediate measurement.""" + add_trial_measurement_request = { + 'trial_name' : self.resource_name, + } + add_trial_measurement_request['measurement'] = vz.MeasurementConverter.to_proto(measurement) + self.api_client.add_trial_measurement(request = add_trial_measurement_request) + + def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial: + """#Materializes the Trial. + + Args: + include_all_measurements: If True, returned Trial includes all + intermediate measurements. The final measurement is always provided. + """ + trial = self.api_client.get_trial(name = self.resource_name) + return copy.deepcopy(vz.TrialConverter.from_proto(trial)) diff --git a/tests/system/aiplatform/test_vizier.py b/tests/system/aiplatform/test_vizier.py new file mode 100644 index 0000000000..0170ee9e4e --- /dev/null +++ b/tests/system/aiplatform/test_vizier.py @@ -0,0 +1,87 @@ +import pytest + +from google.api_core import exceptions +from google.cloud import aiplatform +from tests.system.aiplatform import e2e_base +from google.cloud.aiplatform.vizier import pyvizier + +_TEST_STUDY_ID=123 + +class TestVizier(e2e_base.TestEndToEnd): + _temp_prefix = "temp_vertex_sdk_e2e_vizier_test" + + def test_vizier_lifecycle(self, shared_state): + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param( + 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + + study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + shared_state["resources"] = [study] + trials = study.suggest(count=3, worker="halio_test_worker") + for trial in trials: + if not trial.should_stop(): + measurement = pyvizier.Measurement() + measurement.metrics['pr-auc'] = 0.4 + trial.add_measurement(measurement=measurement) + trial.complete(measurement=measurement) + optimal_trials = study.optimal_trials() + + for trial in study.trials(): + assert trial.status == pyvizier.TrialStatus.COMPLETED + assert optimal_trials[0].status == pyvizier.TrialStatus.COMPLETED + + def test_vizier_study_deletion(self, shared_state): + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param( + 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + + study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + study.delete() + + + with pytest.raises(exceptions.NotFound): + study = aiplatform.Study(study_id = study.name) + + + def test_vizier_trial_deletion(self, shared_state): + aiplatform.init( + project=e2e_base._PROJECT, location=e2e_base._LOCATION, + ) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param( + 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + + study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + trials = study.suggest(count=1, worker="halio_test_worker") + trials[0].delete() + + + with pytest.raises(exceptions.NotFound): + study = aiplatform.Trial(study_id = study.name, trial_name = trials[0].name) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py new file mode 100644 index 0000000000..a1ca701a39 --- /dev/null +++ b/tests/unit/aiplatform/test_vizier.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import pytest +import datetime +import uuid + + +from unittest import mock +from importlib import reload +from unittest.mock import MagicMock, patch +from unittest.mock import ANY + +from google.api_core import exceptions +from google.api_core import operation +from google.protobuf import field_mask_pb2, timestamp_pb2 + +from google.cloud import aiplatform +from google.cloud.aiplatform import base +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform import utils +from google.cloud.aiplatform.utils import resource_manager_utils +from google.cloud.aiplatform.vizier import pyvizier + +from google.cloud.aiplatform.compat.services import vizier_service_client +from google.cloud.aiplatform.compat.types import ( + study as gca_study, + vizier_service as gca_vizier_service +) +from google.protobuf import duration_pb2 + + +# project +_TEST_PROJECT = "test-project" +_TEST_LOCATION = "us-central1" +_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" + +# study +_TEST_STUDY_ID = "12345" +_TEST_STUDY_NAME = f"{_TEST_PARENT}/studies/{_TEST_STUDY_ID}" + +# trial +_TEST_TRIAL_ID = "1" +_TEST_TRIAL_NAME = f"{_TEST_STUDY_NAME}/trials/{_TEST_TRIAL_ID}" + +_TEST_METRIC_ID = "pr-auc" +_TEST_DISPLAY_NAME = "test_study_python_aiplatform" + +_TEST_PARAMETER_ID_1 = "learning_rate" +_TEST_PARAMETER_ID_MIN_VALUE_1 = 1e-05 +_TEST_PARAMETER_ID_MAX_VALUE_1 = 1.0 + +_TEST_PARAMETER_ID_2 = "optimizer" +_TEST_PARAMETER_VALUE_2 = ["adagrad", "adam", "experimental"] + +_TEST_STUDY = gca_study.Study( + display_name = _TEST_DISPLAY_NAME, + study_spec = gca_study.StudySpec( + algorithm = gca_study.StudySpec.Algorithm.RANDOM_SEARCH, + metrics = [gca_study.StudySpec.MetricSpec( + metric_id = _TEST_METRIC_ID, + goal = gca_study.StudySpec.MetricSpec.GoalType.MAXIMIZE + )], + parameters = [ + gca_study.StudySpec.ParameterSpec( + parameter_id = _TEST_PARAMETER_ID_1, + scale_type = gca_study.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE, + double_value_spec = gca_study.StudySpec.ParameterSpec.DoubleValueSpec( + min_value = _TEST_PARAMETER_ID_MIN_VALUE_1, + max_value = _TEST_PARAMETER_ID_MAX_VALUE_1 + ) + ), + gca_study.StudySpec.ParameterSpec( + parameter_id = _TEST_PARAMETER_ID_2, + categorical_value_spec = gca_study.StudySpec.ParameterSpec.CategoricalValueSpec( + values = _TEST_PARAMETER_VALUE_2 + ) + ) + ] + ) +) + +@pytest.fixture +def get_study_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "get_study" + ) as get_study_mock: + get_study_mock.return_value = gca_study.Study( + name=_TEST_STUDY_NAME + ) + yield get_study_mock + +@pytest.fixture +def get_trial_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "get_trial" + ) as get_trial_mock: + get_trial_mock.return_value = gca_study.Trial( + name=_TEST_TRIAL_NAME, + state=gca_study.Trial.State.ACTIVE, + parameters=[gca_study.Trial.Parameter( + parameter_id=_TEST_PARAMETER_ID_1, + value=_TEST_PARAMETER_ID_MIN_VALUE_1 + )] + ) + yield get_trial_mock + +@pytest.fixture +def create_study_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "create_study" + ) as create_study_mock: + create_study_mock.return_value = ( + gca_study.Study( + name=_TEST_STUDY_NAME, + ) + ) + yield create_study_mock + +@pytest.fixture +def lookup_study_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "lookup_study" + ) as lookup_study_mock: + lookup_study_mock.return_value = ( + gca_study.Study( + name=_TEST_STUDY_NAME, + ) + ) + yield lookup_study_mock + +@pytest.fixture +def suggest_trials_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "suggest_trials" + ) as suggest_trials_mock: + suggest_trials_lro_mock = mock.Mock(operation.Operation) + suggest_trials_lro_mock.result.return_value = gca_vizier_service.SuggestTrialsResponse( + trials=[gca_study.Trial(name=_TEST_TRIAL_NAME)]) + suggest_trials_mock.return_value = suggest_trials_lro_mock + yield suggest_trials_mock + +@pytest.fixture +def list_optimal_trials_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "list_optimal_trials" + ) as list_optimal_trials_mock: + list_optimal_trials_mock.return_value = gca_vizier_service.ListOptimalTrialsResponse( + optimal_trials = [gca_study.Trial(name=_TEST_TRIAL_NAME)] + ) + yield list_optimal_trials_mock + +@pytest.fixture +def list_trials_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "list_trials" + ) as list_trials_mock: + list_trials_mock.return_value = gca_vizier_service.ListTrialsResponse( + trials = [gca_study.Trial(name=_TEST_TRIAL_NAME)] + ) + yield list_trials_mock + +@pytest.fixture +def delete_study_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "delete_study" + ) as delete_study_mock: + yield delete_study_mock + +@pytest.fixture +def delete_trial_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "delete_trial" + ) as delete_trial_mock: + yield delete_trial_mock + +@pytest.fixture +def complete_trial_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "complete_trial" + ) as complete_trial_mock: + complete_trial_mock.return_value = gca_study.Trial( + name=_TEST_TRIAL_NAME, + final_measurement = gca_study.Measurement(step_count = 3, + metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 5)])) + yield complete_trial_mock + +@pytest.fixture +def complete_trial_empty_measurement_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "complete_trial" + ) as complete_trial_empty_measurement_mock: + complete_trial_empty_measurement_mock.return_value = gca_study.Trial(name=_TEST_TRIAL_NAME) + yield complete_trial_empty_measurement_mock + +@pytest.fixture +def should_stop_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "check_trial_early_stopping_state" + ) as should_stop_mock: + should_stop_lro_mock = mock.Mock(operation.Operation) + should_stop_lro_mock.result.return_value = gca_vizier_service.CheckTrialEarlyStoppingStateResponse( + should_stop=True) + should_stop_mock.return_value = should_stop_lro_mock + yield should_stop_mock + +@pytest.fixture +def create_study_mock_already_exists(): + with patch.object( + vizier_service_client.VizierServiceClient, "create_study" + ) as create_study_mock: + create_study_mock.side_effect = [ + exceptions. AlreadyExists("Study already exists."), + gca_study.Study( + name=_TEST_STUDY_NAME, + ) + ] + yield create_study_mock + +@pytest.fixture +def add_measurement_mock(): + with patch.object( + vizier_service_client.VizierServiceClient, "add_trial_measurement" + ) as add_measurement_mock: + yield add_measurement_mock + +class TestStudy: + def setup_method(self): + reload(initializer) + reload(aiplatform) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + @pytest.mark.usefixtures("get_study_mock") + def test_create_study(self, create_study_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + create_study_mock.assert_called_once_with(parent=_TEST_PARENT, study=_TEST_STUDY) + assert type(study) == aiplatform.Study + + @pytest.mark.usefixtures("get_study_mock") + def test_create_study_already_exists(self, create_study_mock_already_exists, lookup_study_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + lookup_study_mock.assert_called_once_with(request = { + "parent": _TEST_PARENT, + "display_name": _TEST_DISPLAY_NAME}) + assert type(study) == aiplatform.Study + + @pytest.mark.usefixtures("get_study_mock") + def test_materialize_study_config(self, create_study_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + study_config = study.materialize_study_config() + + create_study_mock.assert_called_once_with(parent=_TEST_PARENT, study=_TEST_STUDY) + assert type(study_config) == pyvizier.StudyConfig + + @pytest.mark.usefixtures("get_study_mock", "get_trial_mock") + def test_suggest(self, create_study_mock, suggest_trials_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + trials = study.suggest(count = 5, worker = 'test_worker') + + suggest_trials_mock.assert_called_once_with(request = { + "parent":_TEST_STUDY_NAME, + "suggestion_count": 5, + "client_id": 'test_worker'}) + assert type(trials[0]) == aiplatform.Trial + + @pytest.mark.usefixtures("get_study_mock") + def test_from_uid(self): + aiplatform.init(project=_TEST_PROJECT) + + study = aiplatform.Study.from_uid(uid = _TEST_STUDY_ID) + + assert type(study) == aiplatform.Study + assert study.name == _TEST_STUDY_ID + + @pytest.mark.usefixtures("get_study_mock") + def test_delete(self, create_study_mock, delete_study_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + study.delete() + + delete_study_mock.assert_called_once_with(name=_TEST_STUDY_NAME) + + @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") + def test_optimal_trials(self, list_optimal_trials_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + trials = study.optimal_trials() + + list_optimal_trials_mock.assert_called_once_with(request = {"parent":_TEST_STUDY_NAME}) + assert type(trials[0]) == aiplatform.Trial + + @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") + def test_list_trials(self, list_trials_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + trials = study.trials() + + list_trials_mock.assert_called_once_with(request = {"parent":_TEST_STUDY_NAME}) + assert type(trials[0]) == aiplatform.Trial + + @pytest.mark.usefixtures("get_study_mock", "create_study_mock") + def test_get_trial(self, get_trial_mock): + aiplatform.init(project=_TEST_PROJECT) + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) + study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + + trial = study.get_trial(1) + + get_trial_mock.assert_called_once_with(name = _TEST_TRIAL_NAME, retry = ANY) + assert type(trial) == aiplatform.Trial + + +class TestTrial: + def setup_method(self): + reload(initializer) + reload(aiplatform) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + @pytest.mark.usefixtures("get_trial_mock") + def test_delete(self, delete_trial_mock): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + + trial.delete() + + delete_trial_mock.assert_called_once_with(name = _TEST_TRIAL_NAME) + assert type(trial) == aiplatform.Trial + + @pytest.mark.usefixtures("get_trial_mock") + def test_complete(self, complete_trial_mock): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + measurement = pyvizier.Measurement() + measurement.metrics['y'] = 4 + + measurement = trial.complete(measurement = measurement, infeasible_reason = 'infeasible') + + complete_trial_mock.assert_called_once_with(request = { + "name": _TEST_TRIAL_NAME, + "infeasible_reason": "infeasible", + "trial_infeasible": True, + "final_measurement": gca_study.Measurement( + elapsed_duration = duration_pb2.Duration(), + metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)]) + }) + assert type(measurement) == pyvizier.Measurement + + @pytest.mark.usefixtures("get_trial_mock") + def test_complete_empty_measurement(self, complete_trial_empty_measurement_mock): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + measurement = pyvizier.Measurement() + measurement.metrics['y'] = 4 + + measurement = trial.complete(measurement = measurement, infeasible_reason = 'infeasible') + + complete_trial_empty_measurement_mock.assert_called_once_with(request = { + "name": _TEST_TRIAL_NAME, + "infeasible_reason": "infeasible", + "trial_infeasible": True, + "final_measurement": gca_study.Measurement( + elapsed_duration = duration_pb2.Duration(), + metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)]) + }) + assert measurement == None + + @pytest.mark.usefixtures("get_trial_mock") + def test_should_stop(self, should_stop_mock): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + + should_stop = trial.should_stop() + + should_stop_mock.assert_called_once_with(request = { + "trial_name": _TEST_TRIAL_NAME}) + assert should_stop == True + + @pytest.mark.usefixtures("get_trial_mock") + def test_add_measurement(self, add_measurement_mock): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + measurement = pyvizier.Measurement() + measurement.metrics['y'] = 4 + + add_measurement = trial.add_measurement(measurement = measurement) + + add_measurement_mock.assert_called_once_with(request = { + "trial_name": _TEST_TRIAL_NAME, + "measurement": gca_study.Measurement( + elapsed_duration = duration_pb2.Duration(), + metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)])}) + assert add_measurement == None + + @pytest.mark.usefixtures("get_trial_mock") + def test_properties(self): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + measurement = pyvizier.Measurement() + measurement.metrics['y'] = 4 + + uid = trial.uid + status = trial.status + parameters = trial.parameters + + assert uid == 1 + assert status == pyvizier.TrialStatus.ACTIVE + assert parameters.get_value(_TEST_PARAMETER_ID_1) == _TEST_PARAMETER_ID_MIN_VALUE_1 + + @pytest.mark.usefixtures("get_trial_mock") + def test_materialize(self): + aiplatform.init(project=_TEST_PROJECT) + trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + measurement = pyvizier.Measurement() + measurement.metrics['y'] = 4 + + materialize_trial = trial.materialize() + + assert materialize_trial.id == 1 + assert materialize_trial.parameters .get_value(_TEST_PARAMETER_ID_1) == _TEST_PARAMETER_ID_MIN_VALUE_1 From 62ac4d7745d27ddf3d5045a451e0523ed9962bde Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 17 Jun 2022 14:32:26 -0700 Subject: [PATCH 10/36] Setup the dependencies for the Vizier. --- setup.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/setup.py b/setup.py index d1d01a6bde..15fa8ab335 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,19 @@ datasets_extra_require = [ "pyarrow >= 3.0.0, < 8.0dev", ] +vizier_extra_require = [ + "attrs==21.4.0", + "absl-py>=0.7", + "numpy>=1.19.0", + "coverage>=4.5,<5.0", + "protobuf>=3.6,<4.0", + "pytype==2022.1.5", + "keras-tuner>=1.0,<2.0", + "portpicker==1.3.1", + "googleapis-common-protos==1.56.0", + "google-api-python-client==1.12.8", + "sqlalchemy==1.4", +] full_extra_require = list( set( tensorboard_extra_require @@ -67,6 +80,7 @@ + featurestore_extra_require + pipelines_extra_requires + datasets_extra_require + + vizier_extra_require ) ) testing_extra_require = ( @@ -119,6 +133,7 @@ "lit": lit_extra_require, "cloud_profiler": profiler_extra_require, "pipelines": pipelines_extra_requires, + "vizier": vizier_extra_require, }, python_requires=">=3.6", classifiers=[ From 101fc60c21af7c6052e87d8b9a99b1f340b4f662 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 17 Jun 2022 14:37:34 -0700 Subject: [PATCH 11/36] Fix the lint error by running the nox -s blacken. --- google/cloud/aiplatform/utils/__init__.py | 3 +- google/cloud/aiplatform/vizier/client_abc.py | 293 +- .../aiplatform/vizier/pyvizier/__init__.py | 20 +- .../vizier/pyvizier/automated_stopping.py | 125 +- .../vizier/pyvizier/base_study_config.py | 2775 +++++++++-------- .../vizier/pyvizier/base_study_config_test.py | 943 +++--- .../aiplatform/vizier/pyvizier/common.py | 892 +++--- .../aiplatform/vizier/pyvizier/common_test.py | 643 ++-- .../aiplatform/vizier/pyvizier/context.py | 75 +- .../vizier/pyvizier/context_test.py | 17 +- .../vizier/pyvizier/metadata_util.py | 176 +- .../vizier/pyvizier/parameter_config.py | 1023 +++--- .../vizier/pyvizier/parameter_config_test.py | 629 ++-- .../vizier/pyvizier/proto_converters.py | 887 +++--- .../vizier/pyvizier/study_config.py | 718 +++-- .../cloud/aiplatform/vizier/pyvizier/trial.py | 907 +++--- .../aiplatform/vizier/pyvizier/trial_test.py | 350 +-- google/cloud/aiplatform/vizier/study.py | 213 +- google/cloud/aiplatform/vizier/trial.py | 145 +- tests/system/aiplatform/test_vizier.py | 76 +- tests/unit/aiplatform/test_vizier.py | 411 ++- 21 files changed, 5947 insertions(+), 5374 deletions(-) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index ef1435cbab..8049c9376b 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -48,7 +48,6 @@ prediction_service_client_v1beta1, tensorboard_service_client_v1beta1, vizier_service_client_v1beta1, - ) from google.cloud.aiplatform.compat.services import ( dataset_service_client_v1, @@ -574,6 +573,7 @@ class TensorboardClientWithOverride(ClientWithOverride): (compat.V1BETA1, tensorboard_service_client_v1beta1.TensorboardServiceClient), ) + class VizierClientWithOverride(ClientWithOverride): _is_temporary = True _default_version = compat.DEFAULT_VERSION @@ -583,7 +583,6 @@ class VizierClientWithOverride(ClientWithOverride): ) - VertexAiServiceClientWithOverride = TypeVar( "VertexAiServiceClientWithOverride", DatasetClientWithOverride, diff --git a/google/cloud/aiplatform/vizier/client_abc.py b/google/cloud/aiplatform/vizier/client_abc.py index 8e36a0b4cd..54c6704aff 100644 --- a/google/cloud/aiplatform/vizier/client_abc.py +++ b/google/cloud/aiplatform/vizier/client_abc.py @@ -22,159 +22,158 @@ from google.cloud.aiplatform.vizier import pyvizier as vz -_T = TypeVar('_T') +_T = TypeVar("_T") class ResourceNotFoundError(LookupError): - """Error raised by Vizier clients when resource is not found.""" - pass + """Error raised by Vizier clients when resource is not found.""" + + pass class TrialInterface(abc.ABC): - """Responsible for trial-level operations.""" - - @property - @abc.abstractmethod - def uid(self) -> int: - """Unique identifier of the trial.""" - - @property - @abc.abstractmethod - def parameters(self) -> Mapping[str, Any]: - """Parameters of the trial.""" - - @property - @abc.abstractmethod - def status(self) -> vz.TrialStatus: - """Trial's status.""" - - @abc.abstractmethod - def delete(self) -> None: - """Delete the Trial in Vizier service. - - There is currently no promise on how this object behaves after `delete()`. - If you are sharing a Trial object in parallel processes, proceed with - caution. - """ - - @abc.abstractmethod - def complete( - self, - measurement: Optional[vz.Measurement] = None, - *, - infeasible_reason: Optional[str] = None) -> Optional[vz.Measurement]: - """Completes the trial and #materializes the measurement. - - * If `measurement` is provided, then Vizier writes it as the trial's final - measurement and returns it. - * If `infeasible_reason` is provided, `measurement` is not needed. - * If neither is provided, then Vizier selects an existing (intermediate) - measurement to be the final measurement and returns it. - - Args: - measurement: Final measurement. - infeasible_reason: Infeasible reason for missing final measurement. - - Returns: - The final measurement of the trial, or None if the trial is marked - infeasible. - - Raises: - ValueError: If neither `measurement` nor `infeasible_reason` is provided - but the trial does not contain any intermediate measurements. - """ - - @abc.abstractmethod - def should_stop(self) -> bool: - """Returns true if the trial should stop.""" - - @abc.abstractmethod - def add_measurement(self, measurement: vz.Measurement) -> None: - """Adds an intermediate measurement.""" - - @abc.abstractmethod - def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial: - """#Materializes the Trial. - - Args: - include_all_measurements: If True, returned Trial includes all - intermediate measurements. The final measurement is always provided. - - Returns: - Trial object. - """ + """Responsible for trial-level operations.""" + + @property + @abc.abstractmethod + def uid(self) -> int: + """Unique identifier of the trial.""" + + @property + @abc.abstractmethod + def parameters(self) -> Mapping[str, Any]: + """Parameters of the trial.""" + + @property + @abc.abstractmethod + def status(self) -> vz.TrialStatus: + """Trial's status.""" + + @abc.abstractmethod + def delete(self) -> None: + """Delete the Trial in Vizier service. + + There is currently no promise on how this object behaves after `delete()`. + If you are sharing a Trial object in parallel processes, proceed with + caution. + """ + + @abc.abstractmethod + def complete( + self, + measurement: Optional[vz.Measurement] = None, + *, + infeasible_reason: Optional[str] = None, + ) -> Optional[vz.Measurement]: + """Completes the trial and #materializes the measurement. + + * If `measurement` is provided, then Vizier writes it as the trial's final + measurement and returns it. + * If `infeasible_reason` is provided, `measurement` is not needed. + * If neither is provided, then Vizier selects an existing (intermediate) + measurement to be the final measurement and returns it. + + Args: + measurement: Final measurement. + infeasible_reason: Infeasible reason for missing final measurement. + + Returns: + The final measurement of the trial, or None if the trial is marked + infeasible. + + Raises: + ValueError: If neither `measurement` nor `infeasible_reason` is provided + but the trial does not contain any intermediate measurements. + """ + + @abc.abstractmethod + def should_stop(self) -> bool: + """Returns true if the trial should stop.""" + + @abc.abstractmethod + def add_measurement(self, measurement: vz.Measurement) -> None: + """Adds an intermediate measurement.""" + + @abc.abstractmethod + def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial: + """#Materializes the Trial. + + Args: + include_all_measurements: If True, returned Trial includes all + intermediate measurements. The final measurement is always provided. + + Returns: + Trial object. + """ class StudyInterface(abc.ABC): - """Responsible for study-level operations.""" - - @abc.abstractmethod - def create_or_load(self, - display_name: str, - problem: vz.ProblemStatement) -> StudyInterface: - """ - """ - - @abc.abstractmethod - def suggest(self, - *, - count: Optional[int] = None, - worker: str = '') -> Collection[TrialInterface]: - """Returns Trials to be evaluated by worker. - - Args: - count: Number of suggestions. - worker: When new Trials are generated, their `assigned_worker` field is - populated with this worker. suggest() first looks for existing Trials - that are assigned to `worker`, before generating new ones. - - Returns: - Trials. - """ - - @abc.abstractmethod - def delete(self) -> None: - """Deletes the study.""" - - @abc.abstractmethod - def trials( - self, - trial_filter: Optional[vz.TrialFilter] = None - ) -> Collection[TrialInterface]: - """Fetches a collection of trials.""" - - @abc.abstractmethod - def get_trial(self, uid: int) -> TrialInterface: - """Fetches a single trial. - - Args: - uid: Unique identifier of the trial within study. - - Returns: - Trial. - - Raises: - ResourceNotFoundError: If trial does not exist. - """ - - @abc.abstractmethod - def optimal_trials(self) -> Collection[TrialInterface]: - """Returns optimal trial(s).""" - - @abc.abstractmethod - def materialize_study_config(self) -> vz.StudyConfig: - """#Materializes the study config.""" - - @abc.abstractclassmethod - def from_uid(cls: Type[_T], uid: str) -> _T: - """Fetches an existing study from the Vizier service. - - Args: - uid: Unique identifier of the study. - - Returns: - Study. - - Raises: - ResourceNotFoundError: If study does not exist. - """ + """Responsible for study-level operations.""" + + @abc.abstractmethod + def create_or_load( + self, display_name: str, problem: vz.ProblemStatement + ) -> StudyInterface: + """ """ + + @abc.abstractmethod + def suggest( + self, *, count: Optional[int] = None, worker: str = "" + ) -> Collection[TrialInterface]: + """Returns Trials to be evaluated by worker. + + Args: + count: Number of suggestions. + worker: When new Trials are generated, their `assigned_worker` field is + populated with this worker. suggest() first looks for existing Trials + that are assigned to `worker`, before generating new ones. + + Returns: + Trials. + """ + + @abc.abstractmethod + def delete(self) -> None: + """Deletes the study.""" + + @abc.abstractmethod + def trials( + self, trial_filter: Optional[vz.TrialFilter] = None + ) -> Collection[TrialInterface]: + """Fetches a collection of trials.""" + + @abc.abstractmethod + def get_trial(self, uid: int) -> TrialInterface: + """Fetches a single trial. + + Args: + uid: Unique identifier of the trial within study. + + Returns: + Trial. + + Raises: + ResourceNotFoundError: If trial does not exist. + """ + + @abc.abstractmethod + def optimal_trials(self) -> Collection[TrialInterface]: + """Returns optimal trial(s).""" + + @abc.abstractmethod + def materialize_study_config(self) -> vz.StudyConfig: + """#Materializes the study config.""" + + @abc.abstractclassmethod + def from_uid(cls: Type[_T], uid: str) -> _T: + """Fetches an existing study from the Vizier service. + + Args: + uid: Unique identifier of the study. + + Returns: + Study. + + Raises: + ResourceNotFoundError: If study does not exist. + """ diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index a2d85c8a7c..2d1c810f72 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -3,19 +3,29 @@ from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricInformation from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricsConfig from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricType -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ObjectiveMetricGoal +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ( + ObjectiveMetricGoal, +) from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ProblemStatement from google.cloud.aiplatform.vizier.pyvizier.base_study_config import SearchSpace -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import SearchSpaceSelector +from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ( + SearchSpaceSelector, +) from google.cloud.aiplatform.vizier.pyvizier.common import Metadata from google.cloud.aiplatform.vizier.pyvizier.common import MetadataValue from google.cloud.aiplatform.vizier.pyvizier.common import Namespace -from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ParameterConfigConverter -from google.cloud.aiplatform.vizier.pyvizier.proto_converters import MeasurementConverter +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ( + ParameterConfigConverter, +) +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ( + MeasurementConverter, +) from google.cloud.aiplatform.vizier.pyvizier.proto_converters import TrialConverter from google.cloud.aiplatform.vizier.pyvizier.study_config import StudyConfig from google.cloud.aiplatform.vizier.pyvizier.study_config import Algorithm -from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import AutomatedStoppingConfig +from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import ( + AutomatedStoppingConfig, +) from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ExternalType from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterConfig from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterType diff --git a/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py b/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py index 5616493ad5..b46f2b12ec 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py +++ b/google/cloud/aiplatform/vizier/pyvizier/automated_stopping.py @@ -8,67 +8,72 @@ AutomatedStoppingConfigProto = Union[ study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec, - study_pb2.StudySpec.MedianAutomatedStoppingSpec] + study_pb2.StudySpec.MedianAutomatedStoppingSpec, +] @attr.s(frozen=True, init=True, slots=True, kw_only=True) class AutomatedStoppingConfig: - """A wrapper for study_pb2.automated_stopping_spec.""" - _proto: AutomatedStoppingConfigProto = attr.ib(init=True, kw_only=True) - - @classmethod - def decay_curve_stopping_config(cls, - use_steps: bool) -> 'AutomatedStoppingConfig': - """Create a DecayCurve automated stopping config. - - Vizier will early stop the Trial if it predicts the Trial objective value - will not be better than previous Trials. - - Args: - use_steps: Bool. If set, use Measurement.step_count as the measure of - training progress. Otherwise, use Measurement.elapsed_duration. - - Returns: - AutomatedStoppingConfig object. - - Raises: - ValueError: If more than one metric is configured. - Note that Vizier Early Stopping currently only supports single-objective - studies. - """ - config = study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec( - use_elapsed_duration=not use_steps) - return cls(proto=config) - - @classmethod - def median_automated_stopping_config( - cls, use_steps: bool) -> 'AutomatedStoppingConfig': - """Create a Median automated stopping config. - - Vizier will early stop the Trial if it predicts the Trial objective value - will not be better than previous Trials. - - Args: - use_steps: Bool. If set, use Measurement.step_count as the measure of - training progress. Otherwise, use Measurement.elapsed_duration. - - Returns: - AutomatedStoppingConfig object. - - Raises: - ValueError: If more than one metric is configured. - Note that Vizier Early Stopping currently only supports single-objective - studies. - """ - config = study_pb2.StudySpec.MedianAutomatedStoppingSpec( - use_elapsed_duration=not use_steps) - return cls(proto=config) - - @classmethod - def from_proto( - cls, proto: AutomatedStoppingConfigProto) -> 'AutomatedStoppingConfig': - return cls(proto=proto) - - def to_proto(self) -> AutomatedStoppingConfigProto: - """Returns this object as a proto.""" - return copy.deepcopy(self._proto) + """A wrapper for study_pb2.automated_stopping_spec.""" + + _proto: AutomatedStoppingConfigProto = attr.ib(init=True, kw_only=True) + + @classmethod + def decay_curve_stopping_config(cls, use_steps: bool) -> "AutomatedStoppingConfig": + """Create a DecayCurve automated stopping config. + + Vizier will early stop the Trial if it predicts the Trial objective value + will not be better than previous Trials. + + Args: + use_steps: Bool. If set, use Measurement.step_count as the measure of + training progress. Otherwise, use Measurement.elapsed_duration. + + Returns: + AutomatedStoppingConfig object. + + Raises: + ValueError: If more than one metric is configured. + Note that Vizier Early Stopping currently only supports single-objective + studies. + """ + config = study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec( + use_elapsed_duration=not use_steps + ) + return cls(proto=config) + + @classmethod + def median_automated_stopping_config( + cls, use_steps: bool + ) -> "AutomatedStoppingConfig": + """Create a Median automated stopping config. + + Vizier will early stop the Trial if it predicts the Trial objective value + will not be better than previous Trials. + + Args: + use_steps: Bool. If set, use Measurement.step_count as the measure of + training progress. Otherwise, use Measurement.elapsed_duration. + + Returns: + AutomatedStoppingConfig object. + + Raises: + ValueError: If more than one metric is configured. + Note that Vizier Early Stopping currently only supports single-objective + studies. + """ + config = study_pb2.StudySpec.MedianAutomatedStoppingSpec( + use_elapsed_duration=not use_steps + ) + return cls(proto=config) + + @classmethod + def from_proto( + cls, proto: AutomatedStoppingConfigProto + ) -> "AutomatedStoppingConfig": + return cls(proto=proto) + + def to_proto(self) -> AutomatedStoppingConfigProto: + """Returns this object as a proto.""" + return copy.deepcopy(self._proto) diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py index 91ba16bd94..c593b44418 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -4,7 +4,19 @@ import enum import math import re -from typing import Callable, Iterable, Iterator, List, Optional, Sequence, Tuple, Type, TypeVar, Union, overload +from typing import ( + Callable, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, + overload, +) import attr import numpy as np @@ -17,1387 +29,1502 @@ ExternalType = parameter_config.ExternalType # A sequence of possible internal parameter values. MonotypeParameterSequence = parameter_config.MonotypeParameterSequence -_T = TypeVar('_T') +_T = TypeVar("_T") ################### Helper Classes ################### -def _min_leq_max(instance: 'MetricInformation', _, value: float): - if value > instance.max_value: - raise ValueError( - f'min_value={value} cannot exceed max_value={instance.max_value}.') +def _min_leq_max(instance: "MetricInformation", _, value: float): + if value > instance.max_value: + raise ValueError( + f"min_value={value} cannot exceed max_value={instance.max_value}." + ) -def _max_geq_min(instance: 'MetricInformation', _, value: float): - if value < instance.min_value: - raise ValueError( - f'min_value={instance.min_value} cannot exceed max_value={value}.') +def _max_geq_min(instance: "MetricInformation", _, value: float): + if value < instance.min_value: + raise ValueError( + f"min_value={instance.min_value} cannot exceed max_value={value}." + ) # Values should NEVER be removed from ObjectiveMetricGoal, only added. class ObjectiveMetricGoal(enum.IntEnum): - """Valid Values for MetricInformation.Goal.""" - MAXIMIZE = 1 - MINIMIZE = 2 + """Valid Values for MetricInformation.Goal.""" - # pylint: disable=comparison-with-callable - @property - def is_maximize(self) -> bool: - return self == self.MAXIMIZE + MAXIMIZE = 1 + MINIMIZE = 2 - @property - def is_minimize(self) -> bool: - return self == self.MINIMIZE + # pylint: disable=comparison-with-callable + @property + def is_maximize(self) -> bool: + return self == self.MAXIMIZE + + @property + def is_minimize(self) -> bool: + return self == self.MINIMIZE class MetricType(enum.Enum): - """Type of the metric. + """Type of the metric. + + OBJECTIVE: Objective to be maximized / minimized. + SAFETY: Objective to be kept above / below a certain threshold. + """ - OBJECTIVE: Objective to be maximized / minimized. - SAFETY: Objective to be kept above / below a certain threshold. - """ - OBJECTIVE = 'OBJECTIVE' - SAFETY = 'SAFETY' # Soft constraint + OBJECTIVE = "OBJECTIVE" + SAFETY = "SAFETY" # Soft constraint - # pylint: disable=comparison-with-callable - @property - def is_safety(self) -> bool: - return self == MetricType.SAFETY + # pylint: disable=comparison-with-callable + @property + def is_safety(self) -> bool: + return self == MetricType.SAFETY - @property - def is_objective(self) -> bool: - return self == MetricType.OBJECTIVE + @property + def is_objective(self) -> bool: + return self == MetricType.OBJECTIVE @attr.define(frozen=False, init=True, slots=True) class MetricInformation: - """MetricInformation provides optimization metrics configuration.""" - - # The name of this metric. An empty string is allowed for single-metric - # optimizations. - name: str = attr.field( - init=True, default='', validator=attr.validators.instance_of(str)) - - goal: ObjectiveMetricGoal = attr.field( - init=True, - # pylint: disable=g-long-lambda - converter=ObjectiveMetricGoal, - validator=attr.validators.instance_of(ObjectiveMetricGoal), - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True) - - # The following are only valid for Safety metrics. - # safety_threshold should always be set to a float (default 0.0), for safety - # metrics. - safety_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True) - safety_std_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True) - percentage_unsafe_trials_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True) - - # Minimum value of this metric can be optionally specified. - min_value: float = attr.field( - init=True, - default=None, - # FYI: Converter is applied before validator. - converter=lambda x: float(x) if x is not None else -np.inf, - validator=[attr.validators.instance_of(float), _min_leq_max], - kw_only=True) - - # Maximum value of this metric can be optionally specified. - max_value: float = attr.field( - init=True, - default=None, - # FYI: Converter is applied before validator. - converter=lambda x: float(x) if x is not None else np.inf, - validator=[attr.validators.instance_of(float), _max_geq_min], - on_setattr=attr.setters.validate, - kw_only=True) - - def min_value_or(self, default_value_fn: Callable[[], float]) -> float: - """Returns the minimum value if finite, or default_value_fn(). - - Avoids the common pitfalls of using - `metric.min_value or default_value` - which would incorrectly use the default_value when min_value == 0, and - requires default_value to have been computed. - - Args: - default_value_fn: Default value if min_value is not finite. - """ - if np.isfinite(self.min_value): - return self.min_value - else: - return default_value_fn() + """MetricInformation provides optimization metrics configuration.""" + + # The name of this metric. An empty string is allowed for single-metric + # optimizations. + name: str = attr.field( + init=True, default="", validator=attr.validators.instance_of(str) + ) + + goal: ObjectiveMetricGoal = attr.field( + init=True, + # pylint: disable=g-long-lambda + converter=ObjectiveMetricGoal, + validator=attr.validators.instance_of(ObjectiveMetricGoal), + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True, + ) + + # The following are only valid for Safety metrics. + # safety_threshold should always be set to a float (default 0.0), for safety + # metrics. + safety_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True, + ) + safety_std_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True, + ) + percentage_unsafe_trials_threshold: Optional[float] = attr.field( + init=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(float)), + kw_only=True, + ) + + # Minimum value of this metric can be optionally specified. + min_value: float = attr.field( + init=True, + default=None, + # FYI: Converter is applied before validator. + converter=lambda x: float(x) if x is not None else -np.inf, + validator=[attr.validators.instance_of(float), _min_leq_max], + kw_only=True, + ) + + # Maximum value of this metric can be optionally specified. + max_value: float = attr.field( + init=True, + default=None, + # FYI: Converter is applied before validator. + converter=lambda x: float(x) if x is not None else np.inf, + validator=[attr.validators.instance_of(float), _max_geq_min], + on_setattr=attr.setters.validate, + kw_only=True, + ) + + def min_value_or(self, default_value_fn: Callable[[], float]) -> float: + """Returns the minimum value if finite, or default_value_fn(). + + Avoids the common pitfalls of using + `metric.min_value or default_value` + which would incorrectly use the default_value when min_value == 0, and + requires default_value to have been computed. + + Args: + default_value_fn: Default value if min_value is not finite. + """ + if np.isfinite(self.min_value): + return self.min_value + else: + return default_value_fn() - def max_value_or(self, default_value_fn: Callable[[], float]) -> float: - """Returns the minimum value if finite, or default_value_fn(). + def max_value_or(self, default_value_fn: Callable[[], float]) -> float: + """Returns the minimum value if finite, or default_value_fn(). - Avoids the common pitfalls of using - `metric.max_value or default_value` - which would incorrectly use the default_value when max_value == 0, and - requires default_value to have been computed. + Avoids the common pitfalls of using + `metric.max_value or default_value` + which would incorrectly use the default_value when max_value == 0, and + requires default_value to have been computed. - Args: - default_value_fn: Default value if max_value is not configured. - """ - if np.isfinite(self.max_value): - return self.max_value - else: - return default_value_fn() - - @property - def range(self) -> float: - """Range of the metric. Can be infinite.""" - return self.max_value - self.min_value - - @property - def type(self) -> MetricType: - if (self.safety_threshold is not None or - self.safety_std_threshold is not None): - return MetricType.SAFETY - else: - return MetricType.OBJECTIVE - - def flip_goal(self) -> 'MetricInformation': - """Flips the goal in-place and returns the reference to self.""" - if self.goal == ObjectiveMetricGoal.MAXIMIZE: - self.goal = ObjectiveMetricGoal.MINIMIZE - else: - self.goal = ObjectiveMetricGoal.MAXIMIZE - return self + Args: + default_value_fn: Default value if max_value is not configured. + """ + if np.isfinite(self.max_value): + return self.max_value + else: + return default_value_fn() + + @property + def range(self) -> float: + """Range of the metric. Can be infinite.""" + return self.max_value - self.min_value + + @property + def type(self) -> MetricType: + if self.safety_threshold is not None or self.safety_std_threshold is not None: + return MetricType.SAFETY + else: + return MetricType.OBJECTIVE + + def flip_goal(self) -> "MetricInformation": + """Flips the goal in-place and returns the reference to self.""" + if self.goal == ObjectiveMetricGoal.MAXIMIZE: + self.goal = ObjectiveMetricGoal.MINIMIZE + else: + self.goal = ObjectiveMetricGoal.MAXIMIZE + return self @attr.define(frozen=False, init=True, slots=True) class MetricsConfig(collections_abc.Collection): - """Container for metrics. - - Metric names should be unique. - """ - _metrics: List[MetricInformation] = attr.ib( - init=True, - factory=list, - converter=list, - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(MetricInformation), - iterable_validator=attr.validators.instance_of(Iterable))) - - def item(self) -> MetricInformation: - if len(self._metrics) != 1: - raise ValueError('Can be called only when there is exactly one metric!') - return self._metrics[0] - - def _assert_names_are_unique(self) -> None: - counts = collections.Counter(metric.name for metric in self._metrics) - if len(counts) != len(self._metrics): - for name, count in counts.items(): - if count > 1: - raise ValueError(f'Duplicate metric name: {name} in {self._metrics}') - - def __attrs_post_init__(self): - self._assert_names_are_unique() - - def __iter__(self) -> Iterator[MetricInformation]: - return iter(self._metrics) - - def __contains__(self, x: object) -> bool: - return x in self._metrics - - def __len__(self) -> int: - return len(self._metrics) - - def __add__(self, metrics: Iterable[MetricInformation]) -> 'MetricsConfig': - return MetricsConfig(self._metrics + list(metrics)) - - def of_type( - self, include: Union[MetricType, - Iterable[MetricType]]) -> 'MetricsConfig': - """Filters the Metrics by type.""" - if isinstance(include, MetricType): - include = (include,) - return MetricsConfig(m for m in self._metrics if m.type in include) - - def append(self, metric: MetricInformation): - self._metrics.append(metric) - self._assert_names_are_unique() - - def extend(self, metrics: Iterable[MetricInformation]): - for metric in metrics: - self.append(metric) - - @property - def is_single_objective(self) -> bool: - """Returns True if only one objective metric is configured.""" - return len(self.of_type(MetricType.OBJECTIVE)) == 1 + """Container for metrics. + + Metric names should be unique. + """ + + _metrics: List[MetricInformation] = attr.ib( + init=True, + factory=list, + converter=list, + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(MetricInformation), + iterable_validator=attr.validators.instance_of(Iterable), + ), + ) + + def item(self) -> MetricInformation: + if len(self._metrics) != 1: + raise ValueError("Can be called only when there is exactly one metric!") + return self._metrics[0] + + def _assert_names_are_unique(self) -> None: + counts = collections.Counter(metric.name for metric in self._metrics) + if len(counts) != len(self._metrics): + for name, count in counts.items(): + if count > 1: + raise ValueError( + f"Duplicate metric name: {name} in {self._metrics}" + ) + + def __attrs_post_init__(self): + self._assert_names_are_unique() + + def __iter__(self) -> Iterator[MetricInformation]: + return iter(self._metrics) + + def __contains__(self, x: object) -> bool: + return x in self._metrics + + def __len__(self) -> int: + return len(self._metrics) + + def __add__(self, metrics: Iterable[MetricInformation]) -> "MetricsConfig": + return MetricsConfig(self._metrics + list(metrics)) + + def of_type( + self, include: Union[MetricType, Iterable[MetricType]] + ) -> "MetricsConfig": + """Filters the Metrics by type.""" + if isinstance(include, MetricType): + include = (include,) + return MetricsConfig(m for m in self._metrics if m.type in include) + + def append(self, metric: MetricInformation): + self._metrics.append(metric) + self._assert_names_are_unique() + + def extend(self, metrics: Iterable[MetricInformation]): + for metric in metrics: + self.append(metric) + + @property + def is_single_objective(self) -> bool: + """Returns True if only one objective metric is configured.""" + return len(self.of_type(MetricType.OBJECTIVE)) == 1 @attr.s(frozen=True, init=True, slots=True, kw_only=True) class _PathSegment: - """Selection of a parameter name and one of its values.""" - # A ParameterConfig name. - name: str = attr.ib( - init=True, validator=attr.validators.instance_of(str), kw_only=True) + """Selection of a parameter name and one of its values.""" - # A ParameterConfig value. - value: Union[int, float, str] = attr.ib( - init=True, - validator=attr.validators.instance_of((int, float, str)), - kw_only=True) + # A ParameterConfig name. + name: str = attr.ib( + init=True, validator=attr.validators.instance_of(str), kw_only=True + ) + + # A ParameterConfig value. + value: Union[int, float, str] = attr.ib( + init=True, + validator=attr.validators.instance_of((int, float, str)), + kw_only=True, + ) class _PathSelector(Sequence[_PathSegment]): - """Immutable sequence of path segments.""" + """Immutable sequence of path segments.""" - def __init__(self, iterable: Iterable[_PathSegment] = tuple()): - self._paths = tuple(iterable) + def __init__(self, iterable: Iterable[_PathSegment] = tuple()): + self._paths = tuple(iterable) - @overload - def __getitem__(self, s: slice) -> '_PathSelector': - ... + @overload + def __getitem__(self, s: slice) -> "_PathSelector": + ... - @overload - def __getitem__(self, i: int) -> _PathSegment: - ... + @overload + def __getitem__(self, i: int) -> _PathSegment: + ... - def __getitem__(self, index): - item = self._paths[index] - if isinstance(item, _PathSegment): - return item - else: - return _PathSelector(item) + def __getitem__(self, index): + item = self._paths[index] + if isinstance(item, _PathSegment): + return item + else: + return _PathSelector(item) - def __len__(self) -> int: - """Returns the number of elements in the container.""" - return len(self._paths) + def __len__(self) -> int: + """Returns the number of elements in the container.""" + return len(self._paths) - def __add__( - self, other: Union[Sequence[_PathSegment], - _PathSegment]) -> '_PathSelector': - if isinstance(other, _PathSegment): - other = [other] - return _PathSelector(self._paths + tuple(other)) + def __add__( + self, other: Union[Sequence[_PathSegment], _PathSegment] + ) -> "_PathSelector": + if isinstance(other, _PathSegment): + other = [other] + return _PathSelector(self._paths + tuple(other)) - def __str__(self) -> str: - """Returns the path as a string.""" - return '/'.join(['{}={}'.format(p.name, p.value) for p in self._paths]) + def __str__(self) -> str: + """Returns the path as a string.""" + return "/".join(["{}={}".format(p.name, p.value) for p in self._paths]) class InvalidParameterError(Exception): - """Error thrown when parameter values are invalid.""" + """Error thrown when parameter values are invalid.""" ################### Main Classes ################### @attr.s(frozen=True, init=True, slots=True, kw_only=True) class SearchSpaceSelector: - """A Selector for all, or part of a SearchSpace.""" - - # List of ParameterConfig objects referenced by this selector. - # This is a reference to a list of objects owned by SearchSpace (and will - # typically include the entire SearchSpace). - _configs: List[parameter_config.ParameterConfig] = attr.ib( - init=True, - factory=list, - # Verify that this is a list of ParameterConfig objects. - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of( - parameter_config.ParameterConfig), - iterable_validator=attr.validators.instance_of(list)), - kw_only=True) - - # _selected_path and _selected_name control how parameters are added to the - # search space. - # - # 1) If _selected_path is empty, and _selected_name is empty, parameters - # are added to the root of the search space. - # 2) If _selected_path is empty, and _selected_name is non-empty, parameters - # will be added as child parameters to all root and child parameters - # with name ==_selected_name. - # 3) If both _selected_path and _selected_name are specified, parameters will - # be added as child parameters to the parameter specified by the path and - # the name. - # 4) If _selected_path is non-empty, and _selected_name is empty, this is an - # error. - - # An ordered list of _PathSelector objects which uniquely identifies a path - # in a conditional tree. - _selected_path: _PathSelector = attr.ib( - init=True, - default=_PathSelector(), - converter=_PathSelector, - # Verify that this is a list of _PathSegment objects. - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(_PathSegment), - iterable_validator=attr.validators.instance_of(Iterable)), - kw_only=True) - - # A ParameterConfig name. - # If there is a _selected_name, then there have to also be _selected_values - # below, and new parameters are added to the parent(s) selected by - # _selected_path and _selected_name. - _selected_name: str = attr.ib( - init=True, - default='', - validator=attr.validators.instance_of(str), - kw_only=True) - - # List of ParameterConfig values from _configs. - # If there are _selected_values, then there have to also be _selected_name - # above. - _selected_values: MonotypeParameterSequence = attr.ib( - init=True, - factory=list, - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of(list)), - kw_only=True) - - @property - def parameter_name(self) -> str: - """Returns the selected parameter name.""" - return self._selected_name - - @property - def parameter_values(self) -> MonotypeParameterSequence: - """Returns the selected parameter values.""" - return copy.deepcopy(self._selected_values) - - def add_float_param(self, - name: str, - min_value: float, - max_value: float, - *, - default_value: Optional[float] = None, - scale_type: Optional[ScaleType] = ScaleType.LINEAR, - index: Optional[int] = None) -> 'SearchSpaceSelector': - """Adds floating point parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - min_value: Inclusive lower bound for the parameter. - max_value: Inclusive upper bound for the parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='rate' and index=0, then a single ParameterConfig with name - 'rate[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector(s) for the newly added parameter(s): - One SearchSpaceSelector if one parameter was added, or a list of - SearchSpaceSelector if multiple parameters were added. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - bounds = (float(min_value), float(max_value)) - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - bounds=bounds, - scale_type=scale_type, - default_value=default_value) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_int_param(self, - name: str, - min_value: int, - max_value: int, - *, - default_value: Optional[int] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None) -> 'SearchSpaceSelector': - """Adds integer parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - min_value: Inclusive lower bound for the parameter. - max_value: Inclusive upper bound for the parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='hidden_units' and index=0, then a single ParameterConfig with name - 'hidden_units[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If min_value or max_value are not integers. - ValueError: If `index` is invalid (e.g. negative). - """ - int_min_value = int(min_value) - if not math.isclose(min_value, int_min_value): - raise ValueError('min_value for an INTEGER parameter should be an integer' - ', got: [{}]'.format(min_value)) - int_max_value = int(max_value) - if not math.isclose(max_value, int_max_value): - raise ValueError('max_value for an INTEGER parameter should be an integer' - ', got: [{}]'.format(min_value)) - bounds = (int_min_value, int_max_value) - - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - bounds=bounds, - scale_type=scale_type, - default_value=default_value) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_discrete_param( - self, - name: str, - feasible_values: Union[Sequence[float], Sequence[int]], - *, - default_value: Optional[Union[float, int]] = None, - scale_type: Optional[ScaleType] = ScaleType.LINEAR, - index: Optional[int] = None, - auto_cast: Optional[bool] = True) -> 'SearchSpaceSelector': - """Adds ordered numeric parameter config(s) with a finite set of values. - - IMPORTANT: If a parameter is discrete, its values are assumed to have - ordered semantics. Thus, you should not use discrete parameters for - unordered values such as ids. In this case, see add_categorical_param() - below. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: The set of feasible values for this parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='batch_size' and index=0, then a single ParameterConfig with name - 'batch_size[0]' is added. `index` should be >= 0. - auto_cast: If False, the external type will be set to INTEGER if all - values are castable to an integer without losing precision. If True, the - external type will be set to float. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - param_names = self._get_parameter_names_to_create(name=name, index=index) - - external_type = ExternalType.FLOAT - if auto_cast: - # If all feasible values are convertible to ints without loss of - # precision, annotate the external type as INTEGER. This will cast - # [0., 1., 2.] into [0, 1, 2] when parameter values are returned in - # clients. - if all([v == round(v) for v in feasible_values]): - external_type = ExternalType.INTEGER - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value, - external_type=external_type) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_categorical_param( - self, - name: str, - feasible_values: Sequence[str], - *, - default_value: Optional[str] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None) -> 'SearchSpaceSelector': - """Adds unordered string-valued parameter config(s) to the search space. - - IMPORTANT: If a parameter is categorical, its values are assumed to be - unordered. If the `feasible_values` have ordering, use add_discrete_param() - above, since it will improve Vizier's model quality. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: The set of feasible values for this parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='id' and index=0, then a single ParameterConfig with name 'id[0]' - is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_bool_param(self, - name: str, - feasible_values: Optional[Sequence[bool]] = None, - *, - default_value: Optional[bool] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None) -> 'SearchSpaceSelector': - """Adds boolean-valued parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: An optional list of feasible boolean values, i.e. one of - the following: [True], [False], [True, False], [False, True]. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='match' and index=0, then a single ParameterConfig with name - 'match[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `feasible_values` has invalid values. - ValueError: If `index` is invalid (e.g. negative). - """ - allowed_values = (None, (True, False), (False, True), (True,), (False,)) - if feasible_values not in allowed_values: - raise ValueError('feasible_values must be one of %s; got: %s.' % - (allowed_values, feasible_values)) - # Boolean parameters are represented as categorical parameters internally. - bool_to_string = lambda x: 'True' if x else 'False' - if feasible_values is None: - categories = ('True', 'False') - else: - categories = [bool_to_string(x) for x in feasible_values] - feasible_values = sorted(categories, reverse=True) - - if default_value is not None: - default_value = bool_to_string(default_value) - - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value, - external_type=ExternalType.BOOLEAN) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def select( - self, - parameter_name: str, - parameter_values: Optional[MonotypeParameterSequence] = None - ) -> 'SearchSpaceSelector': - """Selects a single parameter specified by path and parameter_name. - - This method should be called to select a parent parameter, before calling - `add_*_param` methods to create child parameters. - - Given a selector to the root of the search space: - root = pyvizier.SearchSpace().select_root() - - 1) To select a parameter at the root of the search space, with parent values - for child parameters: - model = root.select('model_type', ['dnn']) - model.add_float_param('hidden_units', ...) - 2) To select a parameter at the root of the search space, and defer parent - value selection to later calls: - model = root.select('model_type') - # Add `hidden_units` and `optimizer_type` as `dnn` children. - model.select_values(['dnn']).add_float_param('hidden_units', ...) - model.select_values(['dnn']).add_categorical_param( - 'optimizer_type', ['adam', 'adagrad']) - # Add `optimizer_type` and `activation` as `linear` children. - model.select_values(['linear']).add_categorical_param( - 'optimizer_type', ['adam', 'ftrl']) - model.select_values(['linear']).add_categorical_param('activation', ...) - 3) To select a parameter in a conditional search space, specify a path, by - chaining select() calls: - optimizer = root.select('model_type', ['linear']).select('optimizer_type') - optimizer.select_values('adam').add_float_param('learning_rate', 0.001,..) - optimizer.select_values('ftrl').add_float_param('learning_rate', 0.1,..) - - # OR pre-select the parent parameter values: - optimizer = root.select('model_type', ['linear']).select( - 'optimizer_type', ['adam']) - optimizer.add_float_param('learning_rate', 0.001,...) - 4) If there is *only one* parameter with the given name, then it is possible - to select it without specifying the path, using: - selectors = root.select_all('activation') - # 'activation' exists only under model_type='linear'. - assert len(selectors) == 1 - activation = selectors[0] - - Args: - parameter_name: - parameter_values: Optional parameter values for this selector, which will - be used to add child parameters, or traverse a conditional tree. - - Returns: - A new SearchSpaceSelector. - """ - # Make sure parameter_name exists in the conditional parameters tree. - # parameter_values will be validated only when a child parameter is added. - if not self._parameter_exists(parameter_name): - raise ValueError('No parameter with name {} exists in this SearchSpace') - - path = [] - selected_values = [] - if parameter_values is not None: - if not isinstance(parameter_values, (list, tuple)): - raise ValueError('parameter_values should be a list or tuple, given ' - '{} with type {}'.format(parameter_values, - type(parameter_values))) - selected_values = parameter_values - - if self._selected_name: - # There is already a parameter name selected, so this is a chained select - # call. - if not self._selected_values: - raise ValueError('Cannot call select() again before parameter values ' - 'are selected: parameter {} was previously selected, ' - ' with the path: {}, but no values were selected for ' - 'it'.format(self.parameter_name, self.path_string)) - # Return a new selector, with the currently selected parameter added to - # the path. - new_path_segment = [ - _PathSegment( - name=self._selected_name, value=self._selected_values[0]) - ] - path = self._selected_path + new_path_segment - if not self._path_exists(path): - raise ValueError('Path {} does not exist in this SearchSpace: ' - '{}'.format((path), self)) - - return SearchSpaceSelector( - configs=self._configs, - selected_path=path, - selected_name=parameter_name, - selected_values=selected_values) - - def select_values( - self, - parameter_values: MonotypeParameterSequence) -> 'SearchSpaceSelector': - """Selects values for a pre-selected parameter. - - This method should be called to select parent parameter(s) value(s), before - calling `add_*_param` methods to create child parameters. - - This method must be called AFTER select(). - This method mutates this selector. - - Args: - parameter_values: Parameter values for this selector, which will be used - to add child parameters. - - Returns: - SearchSpaceSelector - """ - if not self._selected_name: - raise ValueError('No parameter is selected. Call select() first.') - if not parameter_values: - raise ValueError( - 'parameter_values cannot be empty. Specify at least one value.') - if not isinstance(parameter_values, (list, tuple)): - raise ValueError('parameter_values should be a list or tuple, given ' - '{} with type {}'.format(parameter_values, - type(parameter_values))) - # TODO: Allow to directly select boolean parent parameters. - object.__setattr__(self, '_selected_values', parameter_values) - return self - - def select_all( - self, parameter_name: str, parameter_values: MonotypeParameterSequence - ) -> List['SearchSpaceSelector']: - """Select one or more parent parameters, with the same name. - - This method should be called to select parent parameter(s), before calling - `add_*_param` methods to create child parameters. - Multiple parent parameters with the same name are possible in a conditional - search space. See go/conditional-parameters for more details. - - 1) If the conditional search space has two parameters with the same - name, 'optimizer_type', given a selector to the root of the search space, - select_all() can be used to simultaneously add child parameters to both - 'optimizer_type` parameters: - - root = pyvizier.SearchSpace().select_root() - model.select_values(['dnn']).add_categorical_param( - 'optimizer_type', ['adam', 'adagrad']) - model.select_values(['linear']).add_categorical_param( - 'optimizer_type', ['adam', 'ftrl']) - # Add a 'learning_rate' parameter to both 'adam' optimizers: - optimizers = model.select_all('optimizer_type', parent_values=['adam']) - optimizers.add_float_param('learning_rate', ...) - - 2) If there is *only one* parameter with the given name, then it is also - possible to use select_all() to select it: - root = pyvizier.SearchSpace().select_root() - model.select_values(['dnn']).add_categorical_param('activation', ...) - # Select the single parameter with the name 'activation': - selectors = root.select_all('activation') - assert len(selectors) == 1 - activation = selector[0] - - Args: - parameter_name: - parameter_values: Optional parameter values for this selector, which will - be used to add child parameters. - - Returns: - List of SearchSpaceSelector - """ - # TODO: Raise an error if this selector already has selected_name. - # Make sure parameter_name exists in the conditional parameters tree. - if not self._parameter_exists(parameter_name): - raise ValueError('No parameter with name {} exists in this SearchSpace') - - if parameter_values is not None: - if not isinstance(parameter_values, (list, tuple)): - raise ValueError('parameter_values should be a list or tuple, given ' - '{} with type {}'.format(parameter_values, - type(parameter_values))) - # TODO: Complete this method. - raise NotImplementedError() - - def _path_exists(self, path: _PathSelector) -> bool: - """Checks if the path exists in the conditional tree.""" - for parent in self._configs: - if (path[0].name == parent.name and - path[0].value in parent.feasible_values): - if len(path) == 1: - # No need to recurse. - return True - return self._path_exists_inner(parent, path[1:]) - return False - - @classmethod - def _path_exists_inner(cls, current_root: parameter_config.ParameterConfig, - current_path: _PathSelector) -> bool: - """Returns true if the path exists, starting at root_parameter.""" - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if (current_path[0].name == child.name and - current_path[0].value in child.feasible_values): - child_idx = idx - break - if child_idx is None: - # No match is found. This path does not exist. - return False - if len(current_path) == 1: - # This is the end of the path. - return True - # Keep traversing. - return cls._path_exists_inner( - current_root.child_parameter_configs[child_idx], current_path[1:]) - - def _parameter_exists(self, parameter_name: str) -> bool: - """Checks if there exists at least one parameter with this name. - - Note that this method checks existence in the entire search space. - - Args: - parameter_name: - - Returns: - bool: Exists. - """ - found = False - for parent in self._configs: - for pc in parent.traverse(show_children=False): - if pc.name == parameter_name: - found = True - break - return found - - @classmethod - def _get_parameter_names_to_create(cls, - *, - name: str, - length: Optional[int] = None, - index: Optional[int] = None) -> List[str]: - """Returns the names of all parameters which should be created. - - Args: - name: The base parameter name. - length: Specifies the length of a multi-dimensional parameters. If larger - than 1, then multiple ParameterConfigs are added. E.g. if name='rate' - and length=2, then two ParameterConfigs with names 'rate[0]', 'rate[1]' - are added. Cannot be specified together with `index`. - index: Specifies the multi-dimensional index for this parameter. Cannot be - specified together with `length`. E.g. if name='rate' and index=1, then - a single ParameterConfig with name 'rate[1]' is added. - - Returns: - List of parameter names to create. - - Raises: - ValueError: If `length` or `index` are invalid. - """ - if length is not None and index is not None: - raise ValueError('Only one of `length` and `index` can be specified. Got' - ' length={}, index={}'.format(length, index)) - if length is not None and length < 1: - raise ValueError('length must be >= 1. Got length={}'.format(length)) - if index is not None and index < 0: - raise ValueError('index must be >= 0. Got index={}'.format(index)) - - param_names = [] - if length is None and index is None: - # Add one parameter with no multi-dimensional index. - param_names.append(name) - elif index is not None: - # Add one parameter with a multi-dimensional index. - param_names.append(cls._multi_dimensional_parameter_name(name, index)) - elif length is not None: - # `length > 0' is synthatic sugar for multi multi-dimensional parameter. - # Each multi-dimensional parameter is encoded as a list of separate - # parameters with names equal to `name[index]` (index is zero based). - for i in range(length): - param_names.append(cls._multi_dimensional_parameter_name(name, i)) - return param_names - - @classmethod - def _multi_dimensional_parameter_name(cls, name: str, index: int) -> str: - """Returns the indexed parameter name.""" - return '{}[{}]'.format(name, index) - - @classmethod - def parse_multi_dimensional_parameter_name( - cls, name: str) -> Optional[Tuple[str, int]]: - """Returns the base name for a multi-dimensional parameter name. - - Args: - name: A parameter name. - - Returns: - (base_name, index): if name='hidden_units[10]', base_name='hidden_units' - and index=10. - Returns None if name is not in the format 'base_name[idx]'. - """ - regex = r'(?P[^()]*)\[(?P\d+)\]$' - pattern = re.compile(regex) - matches = pattern.match(name) - if matches is None: - return None - return (matches.groupdict()['name'], int(matches.groupdict()['index'])) - - @property - def path_string(self) -> str: - """Returns the selected path as a string.""" - return str(self._selected_path) - - def _add_parameters( - self, parameters: List[parameter_config.ParameterConfig] - ) -> List['SearchSpaceSelector']: - """Adds ParameterConfigs either to the root, or as child parameters. - - Args: - parameters: The parameters to add to the search space. - - Returns: - A list of SearchSpaceSelectors, one for each parameters added. - """ - if self._selected_name and not self._selected_values: - raise ValueError( - 'Cannot add child parameters to parameter {}: parent values were ' - 'not selected. Call select_values() first.'.format( - self._selected_name)) - if not self._selected_name and self._selected_values: - raise ValueError( - 'Cannot add child parameters: no parent name is selected.' - ' Call select() or select_all() first.') - if self._selected_path and not self._selected_name: - raise ValueError( - 'Cannot add child parameters: path is specified ({}), but no parent' - ' name is specified. Call select() or select_all() first'.format( - self.path_string)) - - selectors: List['SearchSpaceSelector'] = [] - if not self._selected_path and not self._selected_name: - # If _selected_path is empty, and _selected_name is empty, parameters - # are added to the root of the search space. - self._configs.extend(parameters) - # Return Selectors for the newly added parameters. - for param in parameters: - selectors.append( - SearchSpaceSelector( - configs=self._configs, - selected_path=[], - selected_name=param.name, - selected_values=[])) - elif not self._selected_path and self._selected_name: - # If _selected_path is empty, and _selected_name is not empty, parameters - # will be added as child parameters to *all* root and child parameters - # with name ==_selected_name. - for idx, root_param in enumerate(self._configs): - updated_param, new_selectors = self._recursive_add_child_parameters( - self._configs, _PathSelector(), root_param, self._selected_name, - self._selected_values, parameters) - # Update the root ParameterConfig in place. - self._configs[idx] = updated_param - selectors.extend(new_selectors) - else: - # If both _selected_path and _selected_name are specified, parameters will - # be added as child parameters to the parameter specified by the path and - # the name. - idx, updated_param, new_selectors = self._add_parameters_at_selected_path( - root_configs=self._configs, - complete_path=self._selected_path, - parent_name=self._selected_name, - parent_values=self._selected_values, - new_children=parameters) - # Update the root ParameterConfig in place. - self._configs[idx] = updated_param - selectors.extend(new_selectors) - - if not selectors: - raise ValueError( - 'Cannot add child parameters: the path ({}), is not valid.'.format( - self.path_string)) - return selectors - - @classmethod - def _recursive_add_child_parameters( - cls, configs: List[parameter_config.ParameterConfig], path: _PathSelector, - root: parameter_config.ParameterConfig, parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig] - ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: - """Recursively adds new children to all matching parameters. - - new_children are potentially added to root, and all matching child - parameters with name==parent_name. - - Args: - configs: A list of configs to include in returned SearchSpaceSelectors, - this list is not modified or used for anything else. - path: The path to include in returned SearchSpaceSelectors. - root: Parent parameter to start the recursion at. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, list of selectors to - any parameters which may have been added) - """ - updated_children: List[Tuple[MonotypeParameterSequence, - parameter_config.ParameterConfig]] = [] - selectors: List['SearchSpaceSelector'] = [] - if root.name == parent_name: - # Add new children to this root. If this is a leaf parameter, - # e.g. it has no children, this is where the recursion ends. - for child in new_children: - updated_children.append((parent_values, child)) - # For the path, select one parent value, since for the path, the exact - # value does not matter, as long as it's valid. - root_path_fragment = [ - _PathSegment(name=root.name, value=parent_values[0]) - ] - selectors.append( - SearchSpaceSelector( - configs=configs, - selected_path=path + root_path_fragment, - selected_name=child.name, - selected_values=[])) - # Recursively update existing children, if any. - for child in root.child_parameter_configs: - # For the path, select one parent value, since for the path, the exact - # value does not matter, as long as it's valid. - root_path_fragment = [ - _PathSegment(name=root.name, value=child.matching_parent_values[0]) - ] - updated_child, new_selectors = cls._recursive_add_child_parameters( - configs, path + root_path_fragment, child, parent_name, parent_values, - new_children) - updated_children.append( - (updated_child.matching_parent_values, updated_child)) - selectors += new_selectors - # Update all children (existing and potentially new) in the root. - return root.clone_without_children.add_children(updated_children), selectors - - @classmethod - def _add_parameters_at_selected_path( - cls, root_configs: List[parameter_config.ParameterConfig], - complete_path: _PathSelector, parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig] - ) -> Tuple[int, parameter_config.ParameterConfig, - List['SearchSpaceSelector']]: - """Adds new children to the parameter specified by the path and parent_name. - - Args: - root_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - complete_path: The path to include in the returned SearchSpaceSelectors. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (Root index in root_configs, - an updated root with all of its children updated, - list of selectors to any parameters which may have been added) - - Raises: - RuntimeError: - ValueError: - """ - if not complete_path: - # This is an internal error, since the caller should never specify an - # empty current_path. - raise RuntimeError('Internal error: got empty complete_path') - - # This is the beginning of the recursion. Select a root to recurse at. - current_root: Optional[parameter_config.ParameterConfig] = None - root_idx: int = 0 - for root_idx, root_param in enumerate(root_configs): - if complete_path[0].name == root_param.name: - current_root = root_param - break - if current_root is None: - raise ValueError('Invalid path: {}: failed to traverse the path: failed' - ' to find a matching root for parameter name "{}".' - ' Root parameter names: {}'.format( - (complete_path), complete_path[0].name, - [pc.name for pc in root_configs])) - - updated_root, selectors = cls._add_parameters_at_selected_path_inner( - root_configs=root_configs, - complete_path=complete_path, - current_root=current_root, - current_path=complete_path[1:], - parent_name=parent_name, - parent_values=parent_values, - new_children=new_children) - return (root_idx, updated_root, selectors) - - @classmethod - def _add_parameters_at_selected_path_inner( - cls, root_configs: List[parameter_config.ParameterConfig], - complete_path: _PathSelector, - current_root: parameter_config.ParameterConfig, - current_path: _PathSelector, parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig] - ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: - """Adds new children to the parameter specified by the path and parent_name. - - Args: - root_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - complete_path: The path to include in the returned SearchSpaceSelectors. - current_root: Parent parameter to start the recursion at. - current_path: The path to the parent parameter from current_root. This is - used in the recursion. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, - List of selectors to all added parameters) - - Raises: - RuntimeError: - ValueError: - """ - updated_children: List[Tuple[MonotypeParameterSequence, - parameter_config.ParameterConfig]] = [] - selectors: List['SearchSpaceSelector'] = [] - - if not current_path: - # This is the end of the path. End the recursion. - # parent_name should be a child of current_root - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if parent_name == child.name: - child_idx = idx - last_parent_path = [ - _PathSegment(name=parent_name, value=parent_values[0]) - ] - new_path = complete_path + last_parent_path - updated_child, selectors = cls._add_child_parameters( - root_configs, new_path, child, parent_values, new_children) - break - if child_idx is None: - raise ValueError('Invalid parent_name: after traversing the path "{}", ' - 'failed to find a child parameter with name "{}".' - ' Current root="{}"'.format((complete_path), - parent_name, current_root)) - - # Update current_root with the updated child. - for idx, child in enumerate(current_root.child_parameter_configs): - if idx == child_idx: - updated_children.append( - (updated_child.matching_parent_values, updated_child)) + """A Selector for all, or part of a SearchSpace.""" + + # List of ParameterConfig objects referenced by this selector. + # This is a reference to a list of objects owned by SearchSpace (and will + # typically include the entire SearchSpace). + _configs: List[parameter_config.ParameterConfig] = attr.ib( + init=True, + factory=list, + # Verify that this is a list of ParameterConfig objects. + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of( + parameter_config.ParameterConfig + ), + iterable_validator=attr.validators.instance_of(list), + ), + kw_only=True, + ) + + # _selected_path and _selected_name control how parameters are added to the + # search space. + # + # 1) If _selected_path is empty, and _selected_name is empty, parameters + # are added to the root of the search space. + # 2) If _selected_path is empty, and _selected_name is non-empty, parameters + # will be added as child parameters to all root and child parameters + # with name ==_selected_name. + # 3) If both _selected_path and _selected_name are specified, parameters will + # be added as child parameters to the parameter specified by the path and + # the name. + # 4) If _selected_path is non-empty, and _selected_name is empty, this is an + # error. + + # An ordered list of _PathSelector objects which uniquely identifies a path + # in a conditional tree. + _selected_path: _PathSelector = attr.ib( + init=True, + default=_PathSelector(), + converter=_PathSelector, + # Verify that this is a list of _PathSegment objects. + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(_PathSegment), + iterable_validator=attr.validators.instance_of(Iterable), + ), + kw_only=True, + ) + + # A ParameterConfig name. + # If there is a _selected_name, then there have to also be _selected_values + # below, and new parameters are added to the parent(s) selected by + # _selected_path and _selected_name. + _selected_name: str = attr.ib( + init=True, default="", validator=attr.validators.instance_of(str), kw_only=True + ) + + # List of ParameterConfig values from _configs. + # If there are _selected_values, then there have to also be _selected_name + # above. + _selected_values: MonotypeParameterSequence = attr.ib( + init=True, + factory=list, + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of(list), + ), + kw_only=True, + ) + + @property + def parameter_name(self) -> str: + """Returns the selected parameter name.""" + return self._selected_name + + @property + def parameter_values(self) -> MonotypeParameterSequence: + """Returns the selected parameter values.""" + return copy.deepcopy(self._selected_values) + + def add_float_param( + self, + name: str, + min_value: float, + max_value: float, + *, + default_value: Optional[float] = None, + scale_type: Optional[ScaleType] = ScaleType.LINEAR, + index: Optional[int] = None, + ) -> "SearchSpaceSelector": + """Adds floating point parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + min_value: Inclusive lower bound for the parameter. + max_value: Inclusive upper bound for the parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='rate' and index=0, then a single ParameterConfig with name + 'rate[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector(s) for the newly added parameter(s): + One SearchSpaceSelector if one parameter was added, or a list of + SearchSpaceSelector if multiple parameters were added. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + bounds = (float(min_value), float(max_value)) + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + bounds=bounds, + scale_type=scale_type, + default_value=default_value, + ) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_int_param( + self, + name: str, + min_value: int, + max_value: int, + *, + default_value: Optional[int] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None, + ) -> "SearchSpaceSelector": + """Adds integer parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + min_value: Inclusive lower bound for the parameter. + max_value: Inclusive upper bound for the parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='hidden_units' and index=0, then a single ParameterConfig with name + 'hidden_units[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If min_value or max_value are not integers. + ValueError: If `index` is invalid (e.g. negative). + """ + int_min_value = int(min_value) + if not math.isclose(min_value, int_min_value): + raise ValueError( + "min_value for an INTEGER parameter should be an integer" + ", got: [{}]".format(min_value) + ) + int_max_value = int(max_value) + if not math.isclose(max_value, int_max_value): + raise ValueError( + "max_value for an INTEGER parameter should be an integer" + ", got: [{}]".format(min_value) + ) + bounds = (int_min_value, int_max_value) + + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + bounds=bounds, + scale_type=scale_type, + default_value=default_value, + ) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_discrete_param( + self, + name: str, + feasible_values: Union[Sequence[float], Sequence[int]], + *, + default_value: Optional[Union[float, int]] = None, + scale_type: Optional[ScaleType] = ScaleType.LINEAR, + index: Optional[int] = None, + auto_cast: Optional[bool] = True, + ) -> "SearchSpaceSelector": + """Adds ordered numeric parameter config(s) with a finite set of values. + + IMPORTANT: If a parameter is discrete, its values are assumed to have + ordered semantics. Thus, you should not use discrete parameters for + unordered values such as ids. In this case, see add_categorical_param() + below. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: The set of feasible values for this parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='batch_size' and index=0, then a single ParameterConfig with name + 'batch_size[0]' is added. `index` should be >= 0. + auto_cast: If False, the external type will be set to INTEGER if all + values are castable to an integer without losing precision. If True, the + external type will be set to float. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + param_names = self._get_parameter_names_to_create(name=name, index=index) + + external_type = ExternalType.FLOAT + if auto_cast: + # If all feasible values are convertible to ints without loss of + # precision, annotate the external type as INTEGER. This will cast + # [0., 1., 2.] into [0, 1, 2] when parameter values are returned in + # clients. + if all([v == round(v) for v in feasible_values]): + external_type = ExternalType.INTEGER + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value, + external_type=external_type, + ) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_categorical_param( + self, + name: str, + feasible_values: Sequence[str], + *, + default_value: Optional[str] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None, + ) -> "SearchSpaceSelector": + """Adds unordered string-valued parameter config(s) to the search space. + + IMPORTANT: If a parameter is categorical, its values are assumed to be + unordered. If the `feasible_values` have ordering, use add_discrete_param() + above, since it will improve Vizier's model quality. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: The set of feasible values for this parameter. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='id' and index=0, then a single ParameterConfig with name 'id[0]' + is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `index` is invalid (e.g. negative). + """ + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value, + ) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def add_bool_param( + self, + name: str, + feasible_values: Optional[Sequence[bool]] = None, + *, + default_value: Optional[bool] = None, + scale_type: Optional[ScaleType] = None, + index: Optional[int] = None, + ) -> "SearchSpaceSelector": + """Adds boolean-valued parameter config(s) to the search space. + + If select_all() was previously called for this selector, so it contains + selected parent values, the parameter configs will be added as child + parameters to the selected parameter configs, and a reference to this + selector is returned. + + If no parent values are selected, the parameter config(s) will be added at + the same level as currently selected parameters, and a reference to the + newly added parameters is returned. + + Args: + name: The parameter's name. Cannot be empty. + feasible_values: An optional list of feasible boolean values, i.e. one of + the following: [True], [False], [True, False], [False, True]. + default_value: A default value for the Parameter. + scale_type: Scaling to be applied. NOT VALIDATED. + index: Specifies the multi-dimensional index for this parameter. E.g. if + name='match' and index=0, then a single ParameterConfig with name + 'match[0]' is added. `index` should be >= 0. + + Returns: + SearchSpaceSelector for the newly added parameter. + + Raises: + ValueError: If `feasible_values` has invalid values. + ValueError: If `index` is invalid (e.g. negative). + """ + allowed_values = (None, (True, False), (False, True), (True,), (False,)) + if feasible_values not in allowed_values: + raise ValueError( + "feasible_values must be one of %s; got: %s." + % (allowed_values, feasible_values) + ) + # Boolean parameters are represented as categorical parameters internally. + bool_to_string = lambda x: "True" if x else "False" + if feasible_values is None: + categories = ("True", "False") else: - updated_children.append((child.matching_parent_values, child)) - return ( - current_root.clone_without_children.add_children(updated_children), - selectors) - - # Traverse the path: find which child matches the next path selection. - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if (current_path[0].name == child.name and - current_path[0].value in child.feasible_values): - child_idx = idx - break - if child_idx is None: - raise ValueError('Invalid path: "{}": failed to traverse the path: failed' - ' to find a matching child for path selector "{}".' - ' Current root="{}", current_path="{}"'.format( - (complete_path), (current_path[:1]), - current_root.name, (current_path))) - - updated_child, selectors = cls._add_parameters_at_selected_path_inner( - root_configs=root_configs, - complete_path=complete_path, - current_root=current_root.child_parameter_configs[child_idx], - current_path=current_path[1:], - parent_name=parent_name, - parent_values=parent_values, - new_children=new_children) - # Update current_root with the updated child, leave the selectors untouched. - for idx, child in enumerate(current_root.child_parameter_configs): - if idx == child_idx: - updated_children.append( - (updated_child.matching_parent_values, updated_child)) - else: - updated_children.append((child.matching_parent_values, child)) - return (current_root.clone_without_children.add_children(updated_children), - selectors) - - @classmethod - def _add_child_parameters( - cls, selector_configs: List[parameter_config.ParameterConfig], - selector_path: _PathSelector, parent: parameter_config.ParameterConfig, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig] - ) -> Tuple[parameter_config.ParameterConfig, List['SearchSpaceSelector']]: - """Adds new children to the parent parameter and returns selectors. - - Args: - selector_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - selector_path: The path to include in the returned SearchSpaceSelectors. - parent: Parent parameter to add children to. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, - List of selectors to all added parameters) - - Raises: - RuntimeError: - ValueError: - """ - updated_children: List[Tuple[MonotypeParameterSequence, - parameter_config.ParameterConfig]] = [] - selectors: List['SearchSpaceSelector'] = [] - - # Add existing children. - for child in parent.child_parameter_configs: - updated_children.append((child.matching_parent_values, child)) - # Add new child parameter configs. - for child in new_children: - updated_children.append((parent_values, child)) - selectors.append( - SearchSpaceSelector( - configs=selector_configs, - selected_path=selector_path, - selected_name=child.name, - selected_values=[])) - # Add all children (existing and potentially new) to the parent. - return (parent.clone_without_children.add_children(updated_children), - selectors) + categories = [bool_to_string(x) for x in feasible_values] + feasible_values = sorted(categories, reverse=True) + + if default_value is not None: + default_value = bool_to_string(default_value) + + param_names = self._get_parameter_names_to_create(name=name, index=index) + + new_params = [] + for param_name in param_names: + new_pc = parameter_config.ParameterConfig.factory( + name=param_name, + feasible_values=sorted(feasible_values), + scale_type=scale_type, + default_value=default_value, + external_type=ExternalType.BOOLEAN, + ) + new_params.append(new_pc) + return self._add_parameters(new_params)[0] + + def select( + self, + parameter_name: str, + parameter_values: Optional[MonotypeParameterSequence] = None, + ) -> "SearchSpaceSelector": + """Selects a single parameter specified by path and parameter_name. + + This method should be called to select a parent parameter, before calling + `add_*_param` methods to create child parameters. + + Given a selector to the root of the search space: + root = pyvizier.SearchSpace().select_root() + + 1) To select a parameter at the root of the search space, with parent values + for child parameters: + model = root.select('model_type', ['dnn']) + model.add_float_param('hidden_units', ...) + 2) To select a parameter at the root of the search space, and defer parent + value selection to later calls: + model = root.select('model_type') + # Add `hidden_units` and `optimizer_type` as `dnn` children. + model.select_values(['dnn']).add_float_param('hidden_units', ...) + model.select_values(['dnn']).add_categorical_param( + 'optimizer_type', ['adam', 'adagrad']) + # Add `optimizer_type` and `activation` as `linear` children. + model.select_values(['linear']).add_categorical_param( + 'optimizer_type', ['adam', 'ftrl']) + model.select_values(['linear']).add_categorical_param('activation', ...) + 3) To select a parameter in a conditional search space, specify a path, by + chaining select() calls: + optimizer = root.select('model_type', ['linear']).select('optimizer_type') + optimizer.select_values('adam').add_float_param('learning_rate', 0.001,..) + optimizer.select_values('ftrl').add_float_param('learning_rate', 0.1,..) + + # OR pre-select the parent parameter values: + optimizer = root.select('model_type', ['linear']).select( + 'optimizer_type', ['adam']) + optimizer.add_float_param('learning_rate', 0.001,...) + 4) If there is *only one* parameter with the given name, then it is possible + to select it without specifying the path, using: + selectors = root.select_all('activation') + # 'activation' exists only under model_type='linear'. + assert len(selectors) == 1 + activation = selectors[0] + + Args: + parameter_name: + parameter_values: Optional parameter values for this selector, which will + be used to add child parameters, or traverse a conditional tree. + + Returns: + A new SearchSpaceSelector. + """ + # Make sure parameter_name exists in the conditional parameters tree. + # parameter_values will be validated only when a child parameter is added. + if not self._parameter_exists(parameter_name): + raise ValueError("No parameter with name {} exists in this SearchSpace") + + path = [] + selected_values = [] + if parameter_values is not None: + if not isinstance(parameter_values, (list, tuple)): + raise ValueError( + "parameter_values should be a list or tuple, given " + "{} with type {}".format(parameter_values, type(parameter_values)) + ) + selected_values = parameter_values + + if self._selected_name: + # There is already a parameter name selected, so this is a chained select + # call. + if not self._selected_values: + raise ValueError( + "Cannot call select() again before parameter values " + "are selected: parameter {} was previously selected, " + " with the path: {}, but no values were selected for " + "it".format(self.parameter_name, self.path_string) + ) + # Return a new selector, with the currently selected parameter added to + # the path. + new_path_segment = [ + _PathSegment(name=self._selected_name, value=self._selected_values[0]) + ] + path = self._selected_path + new_path_segment + if not self._path_exists(path): + raise ValueError( + "Path {} does not exist in this SearchSpace: " + "{}".format((path), self) + ) + + return SearchSpaceSelector( + configs=self._configs, + selected_path=path, + selected_name=parameter_name, + selected_values=selected_values, + ) + + def select_values( + self, parameter_values: MonotypeParameterSequence + ) -> "SearchSpaceSelector": + """Selects values for a pre-selected parameter. + + This method should be called to select parent parameter(s) value(s), before + calling `add_*_param` methods to create child parameters. + + This method must be called AFTER select(). + This method mutates this selector. + + Args: + parameter_values: Parameter values for this selector, which will be used + to add child parameters. + + Returns: + SearchSpaceSelector + """ + if not self._selected_name: + raise ValueError("No parameter is selected. Call select() first.") + if not parameter_values: + raise ValueError( + "parameter_values cannot be empty. Specify at least one value." + ) + if not isinstance(parameter_values, (list, tuple)): + raise ValueError( + "parameter_values should be a list or tuple, given " + "{} with type {}".format(parameter_values, type(parameter_values)) + ) + # TODO: Allow to directly select boolean parent parameters. + object.__setattr__(self, "_selected_values", parameter_values) + return self + + def select_all( + self, parameter_name: str, parameter_values: MonotypeParameterSequence + ) -> List["SearchSpaceSelector"]: + """Select one or more parent parameters, with the same name. + + This method should be called to select parent parameter(s), before calling + `add_*_param` methods to create child parameters. + Multiple parent parameters with the same name are possible in a conditional + search space. See go/conditional-parameters for more details. + + 1) If the conditional search space has two parameters with the same + name, 'optimizer_type', given a selector to the root of the search space, + select_all() can be used to simultaneously add child parameters to both + 'optimizer_type` parameters: + + root = pyvizier.SearchSpace().select_root() + model.select_values(['dnn']).add_categorical_param( + 'optimizer_type', ['adam', 'adagrad']) + model.select_values(['linear']).add_categorical_param( + 'optimizer_type', ['adam', 'ftrl']) + # Add a 'learning_rate' parameter to both 'adam' optimizers: + optimizers = model.select_all('optimizer_type', parent_values=['adam']) + optimizers.add_float_param('learning_rate', ...) + + 2) If there is *only one* parameter with the given name, then it is also + possible to use select_all() to select it: + root = pyvizier.SearchSpace().select_root() + model.select_values(['dnn']).add_categorical_param('activation', ...) + # Select the single parameter with the name 'activation': + selectors = root.select_all('activation') + assert len(selectors) == 1 + activation = selector[0] + + Args: + parameter_name: + parameter_values: Optional parameter values for this selector, which will + be used to add child parameters. + + Returns: + List of SearchSpaceSelector + """ + # TODO: Raise an error if this selector already has selected_name. + # Make sure parameter_name exists in the conditional parameters tree. + if not self._parameter_exists(parameter_name): + raise ValueError("No parameter with name {} exists in this SearchSpace") + + if parameter_values is not None: + if not isinstance(parameter_values, (list, tuple)): + raise ValueError( + "parameter_values should be a list or tuple, given " + "{} with type {}".format(parameter_values, type(parameter_values)) + ) + # TODO: Complete this method. + raise NotImplementedError() + + def _path_exists(self, path: _PathSelector) -> bool: + """Checks if the path exists in the conditional tree.""" + for parent in self._configs: + if path[0].name == parent.name and path[0].value in parent.feasible_values: + if len(path) == 1: + # No need to recurse. + return True + return self._path_exists_inner(parent, path[1:]) + return False + + @classmethod + def _path_exists_inner( + cls, current_root: parameter_config.ParameterConfig, current_path: _PathSelector + ) -> bool: + """Returns true if the path exists, starting at root_parameter.""" + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if ( + current_path[0].name == child.name + and current_path[0].value in child.feasible_values + ): + child_idx = idx + break + if child_idx is None: + # No match is found. This path does not exist. + return False + if len(current_path) == 1: + # This is the end of the path. + return True + # Keep traversing. + return cls._path_exists_inner( + current_root.child_parameter_configs[child_idx], current_path[1:] + ) + + def _parameter_exists(self, parameter_name: str) -> bool: + """Checks if there exists at least one parameter with this name. + + Note that this method checks existence in the entire search space. + + Args: + parameter_name: + + Returns: + bool: Exists. + """ + found = False + for parent in self._configs: + for pc in parent.traverse(show_children=False): + if pc.name == parameter_name: + found = True + break + return found + + @classmethod + def _get_parameter_names_to_create( + cls, *, name: str, length: Optional[int] = None, index: Optional[int] = None + ) -> List[str]: + """Returns the names of all parameters which should be created. + + Args: + name: The base parameter name. + length: Specifies the length of a multi-dimensional parameters. If larger + than 1, then multiple ParameterConfigs are added. E.g. if name='rate' + and length=2, then two ParameterConfigs with names 'rate[0]', 'rate[1]' + are added. Cannot be specified together with `index`. + index: Specifies the multi-dimensional index for this parameter. Cannot be + specified together with `length`. E.g. if name='rate' and index=1, then + a single ParameterConfig with name 'rate[1]' is added. + + Returns: + List of parameter names to create. + + Raises: + ValueError: If `length` or `index` are invalid. + """ + if length is not None and index is not None: + raise ValueError( + "Only one of `length` and `index` can be specified. Got" + " length={}, index={}".format(length, index) + ) + if length is not None and length < 1: + raise ValueError("length must be >= 1. Got length={}".format(length)) + if index is not None and index < 0: + raise ValueError("index must be >= 0. Got index={}".format(index)) + + param_names = [] + if length is None and index is None: + # Add one parameter with no multi-dimensional index. + param_names.append(name) + elif index is not None: + # Add one parameter with a multi-dimensional index. + param_names.append(cls._multi_dimensional_parameter_name(name, index)) + elif length is not None: + # `length > 0' is synthatic sugar for multi multi-dimensional parameter. + # Each multi-dimensional parameter is encoded as a list of separate + # parameters with names equal to `name[index]` (index is zero based). + for i in range(length): + param_names.append(cls._multi_dimensional_parameter_name(name, i)) + return param_names + + @classmethod + def _multi_dimensional_parameter_name(cls, name: str, index: int) -> str: + """Returns the indexed parameter name.""" + return "{}[{}]".format(name, index) + + @classmethod + def parse_multi_dimensional_parameter_name( + cls, name: str + ) -> Optional[Tuple[str, int]]: + """Returns the base name for a multi-dimensional parameter name. + + Args: + name: A parameter name. + + Returns: + (base_name, index): if name='hidden_units[10]', base_name='hidden_units' + and index=10. + Returns None if name is not in the format 'base_name[idx]'. + """ + regex = r"(?P[^()]*)\[(?P\d+)\]$" + pattern = re.compile(regex) + matches = pattern.match(name) + if matches is None: + return None + return (matches.groupdict()["name"], int(matches.groupdict()["index"])) + + @property + def path_string(self) -> str: + """Returns the selected path as a string.""" + return str(self._selected_path) + + def _add_parameters( + self, parameters: List[parameter_config.ParameterConfig] + ) -> List["SearchSpaceSelector"]: + """Adds ParameterConfigs either to the root, or as child parameters. + + Args: + parameters: The parameters to add to the search space. + + Returns: + A list of SearchSpaceSelectors, one for each parameters added. + """ + if self._selected_name and not self._selected_values: + raise ValueError( + "Cannot add child parameters to parameter {}: parent values were " + "not selected. Call select_values() first.".format(self._selected_name) + ) + if not self._selected_name and self._selected_values: + raise ValueError( + "Cannot add child parameters: no parent name is selected." + " Call select() or select_all() first." + ) + if self._selected_path and not self._selected_name: + raise ValueError( + "Cannot add child parameters: path is specified ({}), but no parent" + " name is specified. Call select() or select_all() first".format( + self.path_string + ) + ) + + selectors: List["SearchSpaceSelector"] = [] + if not self._selected_path and not self._selected_name: + # If _selected_path is empty, and _selected_name is empty, parameters + # are added to the root of the search space. + self._configs.extend(parameters) + # Return Selectors for the newly added parameters. + for param in parameters: + selectors.append( + SearchSpaceSelector( + configs=self._configs, + selected_path=[], + selected_name=param.name, + selected_values=[], + ) + ) + elif not self._selected_path and self._selected_name: + # If _selected_path is empty, and _selected_name is not empty, parameters + # will be added as child parameters to *all* root and child parameters + # with name ==_selected_name. + for idx, root_param in enumerate(self._configs): + updated_param, new_selectors = self._recursive_add_child_parameters( + self._configs, + _PathSelector(), + root_param, + self._selected_name, + self._selected_values, + parameters, + ) + # Update the root ParameterConfig in place. + self._configs[idx] = updated_param + selectors.extend(new_selectors) + else: + # If both _selected_path and _selected_name are specified, parameters will + # be added as child parameters to the parameter specified by the path and + # the name. + idx, updated_param, new_selectors = self._add_parameters_at_selected_path( + root_configs=self._configs, + complete_path=self._selected_path, + parent_name=self._selected_name, + parent_values=self._selected_values, + new_children=parameters, + ) + # Update the root ParameterConfig in place. + self._configs[idx] = updated_param + selectors.extend(new_selectors) + + if not selectors: + raise ValueError( + "Cannot add child parameters: the path ({}), is not valid.".format( + self.path_string + ) + ) + return selectors + + @classmethod + def _recursive_add_child_parameters( + cls, + configs: List[parameter_config.ParameterConfig], + path: _PathSelector, + root: parameter_config.ParameterConfig, + parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig], + ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: + """Recursively adds new children to all matching parameters. + + new_children are potentially added to root, and all matching child + parameters with name==parent_name. + + Args: + configs: A list of configs to include in returned SearchSpaceSelectors, + this list is not modified or used for anything else. + path: The path to include in returned SearchSpaceSelectors. + root: Parent parameter to start the recursion at. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, list of selectors to + any parameters which may have been added) + """ + updated_children: List[ + Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] + ] = [] + selectors: List["SearchSpaceSelector"] = [] + if root.name == parent_name: + # Add new children to this root. If this is a leaf parameter, + # e.g. it has no children, this is where the recursion ends. + for child in new_children: + updated_children.append((parent_values, child)) + # For the path, select one parent value, since for the path, the exact + # value does not matter, as long as it's valid. + root_path_fragment = [ + _PathSegment(name=root.name, value=parent_values[0]) + ] + selectors.append( + SearchSpaceSelector( + configs=configs, + selected_path=path + root_path_fragment, + selected_name=child.name, + selected_values=[], + ) + ) + # Recursively update existing children, if any. + for child in root.child_parameter_configs: + # For the path, select one parent value, since for the path, the exact + # value does not matter, as long as it's valid. + root_path_fragment = [ + _PathSegment(name=root.name, value=child.matching_parent_values[0]) + ] + updated_child, new_selectors = cls._recursive_add_child_parameters( + configs, + path + root_path_fragment, + child, + parent_name, + parent_values, + new_children, + ) + updated_children.append( + (updated_child.matching_parent_values, updated_child) + ) + selectors += new_selectors + # Update all children (existing and potentially new) in the root. + return root.clone_without_children.add_children(updated_children), selectors + + @classmethod + def _add_parameters_at_selected_path( + cls, + root_configs: List[parameter_config.ParameterConfig], + complete_path: _PathSelector, + parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig], + ) -> Tuple[int, parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: + """Adds new children to the parameter specified by the path and parent_name. + + Args: + root_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + complete_path: The path to include in the returned SearchSpaceSelectors. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (Root index in root_configs, + an updated root with all of its children updated, + list of selectors to any parameters which may have been added) + + Raises: + RuntimeError: + ValueError: + """ + if not complete_path: + # This is an internal error, since the caller should never specify an + # empty current_path. + raise RuntimeError("Internal error: got empty complete_path") + + # This is the beginning of the recursion. Select a root to recurse at. + current_root: Optional[parameter_config.ParameterConfig] = None + root_idx: int = 0 + for root_idx, root_param in enumerate(root_configs): + if complete_path[0].name == root_param.name: + current_root = root_param + break + if current_root is None: + raise ValueError( + "Invalid path: {}: failed to traverse the path: failed" + ' to find a matching root for parameter name "{}".' + " Root parameter names: {}".format( + (complete_path), + complete_path[0].name, + [pc.name for pc in root_configs], + ) + ) + + updated_root, selectors = cls._add_parameters_at_selected_path_inner( + root_configs=root_configs, + complete_path=complete_path, + current_root=current_root, + current_path=complete_path[1:], + parent_name=parent_name, + parent_values=parent_values, + new_children=new_children, + ) + return (root_idx, updated_root, selectors) + + @classmethod + def _add_parameters_at_selected_path_inner( + cls, + root_configs: List[parameter_config.ParameterConfig], + complete_path: _PathSelector, + current_root: parameter_config.ParameterConfig, + current_path: _PathSelector, + parent_name: str, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig], + ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: + """Adds new children to the parameter specified by the path and parent_name. + + Args: + root_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + complete_path: The path to include in the returned SearchSpaceSelectors. + current_root: Parent parameter to start the recursion at. + current_path: The path to the parent parameter from current_root. This is + used in the recursion. + parent_name: new_children are added to all parameter with this name. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, + List of selectors to all added parameters) + + Raises: + RuntimeError: + ValueError: + """ + updated_children: List[ + Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] + ] = [] + selectors: List["SearchSpaceSelector"] = [] + + if not current_path: + # This is the end of the path. End the recursion. + # parent_name should be a child of current_root + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if parent_name == child.name: + child_idx = idx + last_parent_path = [ + _PathSegment(name=parent_name, value=parent_values[0]) + ] + new_path = complete_path + last_parent_path + updated_child, selectors = cls._add_child_parameters( + root_configs, new_path, child, parent_values, new_children + ) + break + if child_idx is None: + raise ValueError( + 'Invalid parent_name: after traversing the path "{}", ' + 'failed to find a child parameter with name "{}".' + ' Current root="{}"'.format( + (complete_path), parent_name, current_root + ) + ) + + # Update current_root with the updated child. + for idx, child in enumerate(current_root.child_parameter_configs): + if idx == child_idx: + updated_children.append( + (updated_child.matching_parent_values, updated_child) + ) + else: + updated_children.append((child.matching_parent_values, child)) + return ( + current_root.clone_without_children.add_children(updated_children), + selectors, + ) + + # Traverse the path: find which child matches the next path selection. + child_idx = None + for idx, child in enumerate(current_root.child_parameter_configs): + if ( + current_path[0].name == child.name + and current_path[0].value in child.feasible_values + ): + child_idx = idx + break + if child_idx is None: + raise ValueError( + 'Invalid path: "{}": failed to traverse the path: failed' + ' to find a matching child for path selector "{}".' + ' Current root="{}", current_path="{}"'.format( + (complete_path), + (current_path[:1]), + current_root.name, + (current_path), + ) + ) + + updated_child, selectors = cls._add_parameters_at_selected_path_inner( + root_configs=root_configs, + complete_path=complete_path, + current_root=current_root.child_parameter_configs[child_idx], + current_path=current_path[1:], + parent_name=parent_name, + parent_values=parent_values, + new_children=new_children, + ) + # Update current_root with the updated child, leave the selectors untouched. + for idx, child in enumerate(current_root.child_parameter_configs): + if idx == child_idx: + updated_children.append( + (updated_child.matching_parent_values, updated_child) + ) + else: + updated_children.append((child.matching_parent_values, child)) + return ( + current_root.clone_without_children.add_children(updated_children), + selectors, + ) + + @classmethod + def _add_child_parameters( + cls, + selector_configs: List[parameter_config.ParameterConfig], + selector_path: _PathSelector, + parent: parameter_config.ParameterConfig, + parent_values: MonotypeParameterSequence, + new_children: List[parameter_config.ParameterConfig], + ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: + """Adds new children to the parent parameter and returns selectors. + + Args: + selector_configs: A list of configs to include in returned + SearchSpaceSelectors, this list is not modified. These are expected to + be the configs at the root of the search space. + selector_path: The path to include in the returned SearchSpaceSelectors. + parent: Parent parameter to add children to. + parent_values: new_children are added with these parent values. + new_children: Child parameter configs to add. + + Returns: + (An updated root with all of its children updated, + List of selectors to all added parameters) + + Raises: + RuntimeError: + ValueError: + """ + updated_children: List[ + Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] + ] = [] + selectors: List["SearchSpaceSelector"] = [] + + # Add existing children. + for child in parent.child_parameter_configs: + updated_children.append((child.matching_parent_values, child)) + # Add new child parameter configs. + for child in new_children: + updated_children.append((parent_values, child)) + selectors.append( + SearchSpaceSelector( + configs=selector_configs, + selected_path=selector_path, + selected_name=child.name, + selected_values=[], + ) + ) + # Add all children (existing and potentially new) to the parent. + return (parent.clone_without_children.add_children(updated_children), selectors) @attr.s(frozen=True, init=True, slots=True, kw_only=True) class SearchSpace: - """A builder and wrapper for StudyConfig.parameter_configs.""" - - _parameter_configs: List[parameter_config.ParameterConfig] = attr.ib( - init=False, factory=list) - - @classmethod - def _factory( - cls: Type[_T], - parameter_configs: Optional[List[parameter_config.ParameterConfig]] = None - ) -> _T: - """Creates a new SearchSpace containing the provided parameter configs. - - Args: - parameter_configs: - - Returns: - SearchSpace - """ - if parameter_configs is None: - parameter_configs = [] - space = cls() - object.__setattr__(space, '_parameter_configs', list(parameter_configs)) - return space - - @property - def parameters(self) -> List[parameter_config.ParameterConfig]: - """Returns COPIES of the parameter configs in this Space.""" - return copy.deepcopy(self._parameter_configs) - - def select_root(self) -> SearchSpaceSelector: - """Returns a selector for the root of the search space. - - Parameters can be added to the search space using the returned - SearchSpaceSelector. - """ - return SearchSpaceSelector(configs=self._parameter_configs) - - @property - def is_conditional(self) -> bool: - """Returns True if search_space contains any conditional parameters.""" - return any([p.child_parameter_configs for p in self._parameter_configs]) - - def contains(self, parameters: trial.ParameterDict) -> bool: - try: - self.assert_contains(parameters) - return True - except InvalidParameterError: - return False - - def assert_contains(self, parameters: trial.ParameterDict) -> bool: - """Throws an error if parameters is not a valid point in the space. - - Args: - parameters: - - Returns: - Always returns True unless an exception is Raised. - - Raises: - InvalidParameterError: If parameters are invalid. - NotImplementedError: If parameter type is unknown - """ - if self.is_conditional: - raise NotImplementedError('Not implemented for conditional space.') - if len(parameters) != len(self._parameter_configs): - set1 = set(pc.name for pc in self._parameter_configs) - set2 = set(parameters) - raise InvalidParameterError( - f'Search space has {len(self._parameter_configs)} parameters ' - f'but only {len(parameters)} were given. ' - f'Missing in search space: {set2 - set1}. ' - f'Missing in parameters: {set1 - set2}.') - for pc in self._parameter_configs: - if pc.name not in parameters: - raise InvalidParameterError(f'{pc.name} is missing in {parameters}.') - elif not pc.contains(parameters[pc.name]): - raise InvalidParameterError( - f'{parameters[pc.name]} is not feasible in {pc}') - return True + """A builder and wrapper for StudyConfig.parameter_configs.""" + + _parameter_configs: List[parameter_config.ParameterConfig] = attr.ib( + init=False, factory=list + ) + + @classmethod + def _factory( + cls: Type[_T], + parameter_configs: Optional[List[parameter_config.ParameterConfig]] = None, + ) -> _T: + """Creates a new SearchSpace containing the provided parameter configs. + + Args: + parameter_configs: + + Returns: + SearchSpace + """ + if parameter_configs is None: + parameter_configs = [] + space = cls() + object.__setattr__(space, "_parameter_configs", list(parameter_configs)) + return space + + @property + def parameters(self) -> List[parameter_config.ParameterConfig]: + """Returns COPIES of the parameter configs in this Space.""" + return copy.deepcopy(self._parameter_configs) + + def select_root(self) -> SearchSpaceSelector: + """Returns a selector for the root of the search space. + + Parameters can be added to the search space using the returned + SearchSpaceSelector. + """ + return SearchSpaceSelector(configs=self._parameter_configs) + + @property + def is_conditional(self) -> bool: + """Returns True if search_space contains any conditional parameters.""" + return any([p.child_parameter_configs for p in self._parameter_configs]) + + def contains(self, parameters: trial.ParameterDict) -> bool: + try: + self.assert_contains(parameters) + return True + except InvalidParameterError: + return False + + def assert_contains(self, parameters: trial.ParameterDict) -> bool: + """Throws an error if parameters is not a valid point in the space. + + Args: + parameters: + + Returns: + Always returns True unless an exception is Raised. + + Raises: + InvalidParameterError: If parameters are invalid. + NotImplementedError: If parameter type is unknown + """ + if self.is_conditional: + raise NotImplementedError("Not implemented for conditional space.") + if len(parameters) != len(self._parameter_configs): + set1 = set(pc.name for pc in self._parameter_configs) + set2 = set(parameters) + raise InvalidParameterError( + f"Search space has {len(self._parameter_configs)} parameters " + f"but only {len(parameters)} were given. " + f"Missing in search space: {set2 - set1}. " + f"Missing in parameters: {set1 - set2}." + ) + for pc in self._parameter_configs: + if pc.name not in parameters: + raise InvalidParameterError(f"{pc.name} is missing in {parameters}.") + elif not pc.contains(parameters[pc.name]): + raise InvalidParameterError( + f"{parameters[pc.name]} is not feasible in {pc}" + ) + return True ################### Main Class ################### @attr.define(frozen=False, init=True, slots=True) class ProblemStatement: - """A builder and wrapper for core StudyConfig functionality.""" - - search_space: SearchSpace = attr.ib( - init=True, - factory=SearchSpace, - validator=attr.validators.instance_of(SearchSpace)) - - metric_information: MetricsConfig = attr.ib( - init=True, - factory=MetricsConfig, - converter=MetricsConfig, - validator=attr.validators.instance_of(MetricsConfig), - kw_only=True) - - metadata: common.Metadata = attr.field( - init=True, - kw_only=True, - factory=common.Metadata, - validator=attr.validators.instance_of(common.Metadata)) - - @property - def debug_info(self) -> str: - return '' + """A builder and wrapper for core StudyConfig functionality.""" + + search_space: SearchSpace = attr.ib( + init=True, + factory=SearchSpace, + validator=attr.validators.instance_of(SearchSpace), + ) + + metric_information: MetricsConfig = attr.ib( + init=True, + factory=MetricsConfig, + converter=MetricsConfig, + validator=attr.validators.instance_of(MetricsConfig), + kw_only=True, + ) + + metadata: common.Metadata = attr.field( + init=True, + kw_only=True, + factory=common.Metadata, + validator=attr.validators.instance_of(common.Metadata), + ) + + @property + def debug_info(self) -> str: + return "" diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py index 5ef52dad05..18f491d437 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py @@ -9,482 +9,503 @@ class ObjectiveMetricGoalTest(absltest.TestCase): - - def test_basics(self): - self.assertTrue(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_maximize) - self.assertFalse(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_minimize) - self.assertTrue(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_minimize) - self.assertFalse(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_maximize) + def test_basics(self): + self.assertTrue(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_maximize) + self.assertFalse(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_minimize) + self.assertTrue(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_minimize) + self.assertFalse(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_maximize) class MetricTypeTest(absltest.TestCase): - - def test_basics(self): - self.assertTrue(base_study_config.MetricType.SAFETY.is_safety) - self.assertTrue(base_study_config.MetricType.OBJECTIVE.is_objective) + def test_basics(self): + self.assertTrue(base_study_config.MetricType.SAFETY.is_safety) + self.assertTrue(base_study_config.MetricType.OBJECTIVE.is_objective) class MetricInformationTest(absltest.TestCase): - - def testMinMaxValueDefault(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE) - self.assertEqual(info.min_value, -np.inf) - self.assertEqual(info.max_value, np.inf) - - def testMinMaxValueSet(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=-1., - max_value=1.) - self.assertEqual(info.min_value, -1.) - self.assertEqual(info.max_value, 1.) - - def testMinMaxBadValueInit(self): - with self.assertRaises(ValueError): - base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=1., - max_value=-1.) - - def testMinMaxBadValueSet(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=-1., - max_value=1.) - with self.assertRaises(ValueError): - info.min_value = 2. - with self.assertRaises(ValueError): - info.max_value = -2. + def testMinMaxValueDefault(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE + ) + self.assertEqual(info.min_value, -np.inf) + self.assertEqual(info.max_value, np.inf) + + def testMinMaxValueSet(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=-1.0, + max_value=1.0, + ) + self.assertEqual(info.min_value, -1.0) + self.assertEqual(info.max_value, 1.0) + + def testMinMaxBadValueInit(self): + with self.assertRaises(ValueError): + base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=1.0, + max_value=-1.0, + ) + + def testMinMaxBadValueSet(self): + info = base_study_config.MetricInformation( + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + min_value=-1.0, + max_value=1.0, + ) + with self.assertRaises(ValueError): + info.min_value = 2.0 + with self.assertRaises(ValueError): + info.max_value = -2.0 class MetricsConfigTest(parameterized.TestCase): - - def testBasics(self): - config = base_study_config.MetricsConfig() - config.append( - base_study_config.MetricInformation( - name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) - config.extend([ - base_study_config.MetricInformation( - name='max_safe1', - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - safety_threshold=0.0), - base_study_config.MetricInformation( - name='max2', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE), - base_study_config.MetricInformation( - name='min1', goal=base_study_config.ObjectiveMetricGoal.MINIMIZE), - base_study_config.MetricInformation( - name='min_safe2', - goal=base_study_config.ObjectiveMetricGoal.MINIMIZE, - safety_threshold=0.0) - ]) - self.assertLen(config, 5) - self.assertLen(config.of_type(base_study_config.MetricType.OBJECTIVE), 3) - self.assertLen(config.of_type(base_study_config.MetricType.SAFETY), 2) - - def testDuplicateNames(self): - config = base_study_config.MetricsConfig() - config.append( - base_study_config.MetricInformation( - name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) - with self.assertRaises(ValueError): - config.append( - base_study_config.MetricInformation( - name='max1', goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE)) + def testBasics(self): + config = base_study_config.MetricsConfig() + config.append( + base_study_config.MetricInformation( + name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE + ) + ) + config.extend( + [ + base_study_config.MetricInformation( + name="max_safe1", + goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, + safety_threshold=0.0, + ), + base_study_config.MetricInformation( + name="max2", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE + ), + base_study_config.MetricInformation( + name="min1", goal=base_study_config.ObjectiveMetricGoal.MINIMIZE + ), + base_study_config.MetricInformation( + name="min_safe2", + goal=base_study_config.ObjectiveMetricGoal.MINIMIZE, + safety_threshold=0.0, + ), + ] + ) + self.assertLen(config, 5) + self.assertLen(config.of_type(base_study_config.MetricType.OBJECTIVE), 3) + self.assertLen(config.of_type(base_study_config.MetricType.SAFETY), 2) + + def testDuplicateNames(self): + config = base_study_config.MetricsConfig() + config.append( + base_study_config.MetricInformation( + name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE + ) + ) + with self.assertRaises(ValueError): + config.append( + base_study_config.MetricInformation( + name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE + ) + ) class SearchSpaceTest(parameterized.TestCase): - - def testAddFloatParamMinimal(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - selector = space.select_root().add_float_param('f1', 1.0, 15.0) - # Test the returned selector. - self.assertEqual(selector.path_string, '') - self.assertEqual(selector.parameter_name, 'f1') - self.assertEqual(selector.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, 'f1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[0].feasible_values - self.assertIsNone(space.parameters[0].default_value) - - _ = space.select_root().add_float_param('f2', 2.0, 16.0) - self.assertLen(space.parameters, 2) - self.assertEqual(space.parameters[0].name, 'f1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[1].name, 'f2') - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - - def testAddFloatParam(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_float_param( - 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, 'f1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - def testAddDiscreteParamIntegerFeasibleValues(self): - """Test a Discrete parameter with integer feasible values.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_discrete_param( - 'd1', [101, 15.0, 21.0], default_value=15.0) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, 'd1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DISCRETE) - self.assertEqual(space.parameters[0].bounds, (15.0, 101.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, [15.0, 21.0, 101]) - self.assertEqual(space.parameters[0].default_value, 15.0) - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.INTEGER) - - def testAddDiscreteParamFloatFeasibleValues(self): - """Test a Discrete parameter with float feasible values.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_discrete_param( - 'd1', [15.1, 21.0, 101], default_value=15.1) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.FLOAT) - - def testAddBooleanParam(self): - """Test a Boolean parameter.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_bool_param('b1', default_value=True) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, 'b1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) - with self.assertRaisesRegex(ValueError, - 'Accessing bounds of a categorical.*'): - _ = space.parameters[0].bounds - self.assertIsNone(space.parameters[0].scale_type) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, ['False', 'True']) - self.assertEqual(space.parameters[0].default_value, 'True') - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.BOOLEAN) - - def testAddBooleanParamWithFalseDefault(self): - """Test a Boolean parameter.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_bool_param('b1', default_value=False) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].default_value, 'False') - - def testAddTwoFloatParams(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_float_param( - 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) - _ = space.select_root().add_float_param( - 'f2', 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG) - - self.assertLen(space.parameters, 2) - - self.assertEqual(space.parameters[0].name, 'f1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, 'f2') - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testChainAddTwoFloatParams(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - root.add_float_param( - 'f1', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG) - root.add_float_param( - 'f2', 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG) - - self.assertLen(space.parameters, 2) - - self.assertEqual(space.parameters[0].name, 'f1') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, 'f2') - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testMultidimensionalParameters(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - selector0 = space.select_root().add_float_param( - 'f', 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG, index=0) - selector1 = space.select_root().add_float_param( - 'f', - 2.0, - 10.0, - default_value=4.0, - scale_type=pc.ScaleType.LINEAR, - index=1) - # Test the returned selectors. - self.assertEqual(selector0.path_string, '') - self.assertEqual(selector0.parameter_name, 'f[0]') - self.assertEqual(selector0.parameter_values, []) - self.assertEqual(selector1.path_string, '') - self.assertEqual(selector1.parameter_name, 'f[1]') - self.assertEqual(selector1.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 2) - self.assertEqual(space.parameters[0].name, 'f[0]') - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, 'f[1]') - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 10.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, 'feasible_values is invalid.*'): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testConditionalParameters(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - root.add_categorical_param( - 'model_type', ['linear', 'dnn'], default_value='dnn') - # Test the selector. - self.assertEqual(root.path_string, '') - self.assertEqual(root.parameter_name, '') - self.assertEqual(root.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, 'model_type') - self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) - with self.assertRaisesRegex(ValueError, - 'Accessing bounds of a categorical.*'): - _ = space.parameters[0].bounds - self.assertIsNone(space.parameters[0].scale_type) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, ['dnn', 'linear']) - self.assertEqual(space.parameters[0].default_value, 'dnn') - - dnn = root.select('model_type', ['dnn']) - # Test the selector. - self.assertEqual(dnn.path_string, '') - self.assertEqual(dnn.parameter_name, 'model_type') - self.assertEqual(dnn.parameter_values, ['dnn']) - dnn.add_float_param( - 'learning_rate', - 0.0001, - 1.0, - default_value=0.001, - scale_type=base_study_config.ScaleType.LOG) - # Test the search space. - self.assertLen(space.parameters, 1) - - linear = root.select('model_type', ['linear']) - # Test the selector. - self.assertEqual(linear.path_string, '') - self.assertEqual(linear.parameter_name, 'model_type') - self.assertEqual(linear.parameter_values, ['linear']) - linear.add_float_param( - 'learning_rate', - 0.1, - 1.0, - default_value=0.1, - scale_type=base_study_config.ScaleType.LOG) - # Test the search space. - self.assertLen(space.parameters, 1) - - dnn_optimizer = dnn.add_categorical_param('optimizer_type', - ['adam', 'adagrad']) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(dnn_optimizer.path_string, 'model_type=dnn') - self.assertEqual(dnn_optimizer.parameter_name, 'optimizer_type') - self.assertEqual(dnn_optimizer.parameter_values, []) - - # Chained select() calls, path length of 1. - lr = root.select('model_type', ['dnn']).select( - 'optimizer_type', ['adam']).add_float_param( - 'learning_rate', + def testAddFloatParamMinimal(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + selector = space.select_root().add_float_param("f1", 1.0, 15.0) + # Test the returned selector. + self.assertEqual(selector.path_string, "") + self.assertEqual(selector.parameter_name, "f1") + self.assertEqual(selector.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, "f1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[0].feasible_values + self.assertIsNone(space.parameters[0].default_value) + + _ = space.select_root().add_float_param("f2", 2.0, 16.0) + self.assertLen(space.parameters, 2) + self.assertEqual(space.parameters[0].name, "f1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[1].name, "f2") + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + + def testAddFloatParam(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_float_param( + "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG + ) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, "f1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + def testAddDiscreteParamIntegerFeasibleValues(self): + """Test a Discrete parameter with integer feasible values.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_discrete_param( + "d1", [101, 15.0, 21.0], default_value=15.0 + ) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, "d1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DISCRETE) + self.assertEqual(space.parameters[0].bounds, (15.0, 101.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, [15.0, 21.0, 101]) + self.assertEqual(space.parameters[0].default_value, 15.0) + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.INTEGER) + + def testAddDiscreteParamFloatFeasibleValues(self): + """Test a Discrete parameter with float feasible values.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_discrete_param( + "d1", [15.1, 21.0, 101], default_value=15.1 + ) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.FLOAT) + + def testAddBooleanParam(self): + """Test a Boolean parameter.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_bool_param("b1", default_value=True) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, "b1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) + with self.assertRaisesRegex(ValueError, "Accessing bounds of a categorical.*"): + _ = space.parameters[0].bounds + self.assertIsNone(space.parameters[0].scale_type) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, ["False", "True"]) + self.assertEqual(space.parameters[0].default_value, "True") + self.assertEqual(space.parameters[0].external_type, pc.ExternalType.BOOLEAN) + + def testAddBooleanParamWithFalseDefault(self): + """Test a Boolean parameter.""" + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_bool_param("b1", default_value=False) + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].default_value, "False") + + def testAddTwoFloatParams(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + _ = space.select_root().add_float_param( + "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG + ) + _ = space.select_root().add_float_param( + "f2", 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG + ) + + self.assertLen(space.parameters, 2) + + self.assertEqual(space.parameters[0].name, "f1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, "f2") + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testChainAddTwoFloatParams(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + root.add_float_param( + "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG + ) + root.add_float_param( + "f2", 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG + ) + + self.assertLen(space.parameters, 2) + + self.assertEqual(space.parameters[0].name, "f1") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, "f2") + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testMultidimensionalParameters(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + selector0 = space.select_root().add_float_param( + "f", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG, index=0 + ) + selector1 = space.select_root().add_float_param( + "f", 2.0, 10.0, default_value=4.0, scale_type=pc.ScaleType.LINEAR, index=1 + ) + # Test the returned selectors. + self.assertEqual(selector0.path_string, "") + self.assertEqual(selector0.parameter_name, "f[0]") + self.assertEqual(selector0.parameter_values, []) + self.assertEqual(selector1.path_string, "") + self.assertEqual(selector1.parameter_name, "f[1]") + self.assertEqual(selector1.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 2) + self.assertEqual(space.parameters[0].name, "f[0]") + self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) + self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[0].feasible_values + self.assertEqual(space.parameters[0].default_value, 3.0) + + self.assertEqual(space.parameters[1].name, "f[1]") + self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) + self.assertEqual(space.parameters[1].bounds, (2.0, 10.0)) + self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.LINEAR) + self.assertEmpty(space.parameters[1].matching_parent_values) + self.assertEmpty(space.parameters[1].child_parameter_configs) + with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): + _ = space.parameters[1].feasible_values + self.assertEqual(space.parameters[1].default_value, 4.0) + + def testConditionalParameters(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + root.add_categorical_param("model_type", ["linear", "dnn"], default_value="dnn") + # Test the selector. + self.assertEqual(root.path_string, "") + self.assertEqual(root.parameter_name, "") + self.assertEqual(root.parameter_values, []) + # Test the search space. + self.assertLen(space.parameters, 1) + self.assertEqual(space.parameters[0].name, "model_type") + self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) + with self.assertRaisesRegex(ValueError, "Accessing bounds of a categorical.*"): + _ = space.parameters[0].bounds + self.assertIsNone(space.parameters[0].scale_type) + self.assertEmpty(space.parameters[0].matching_parent_values) + self.assertEmpty(space.parameters[0].child_parameter_configs) + self.assertEqual(space.parameters[0].feasible_values, ["dnn", "linear"]) + self.assertEqual(space.parameters[0].default_value, "dnn") + + dnn = root.select("model_type", ["dnn"]) + # Test the selector. + self.assertEqual(dnn.path_string, "") + self.assertEqual(dnn.parameter_name, "model_type") + self.assertEqual(dnn.parameter_values, ["dnn"]) + dnn.add_float_param( + "learning_rate", + 0.0001, + 1.0, + default_value=0.001, + scale_type=base_study_config.ScaleType.LOG, + ) + # Test the search space. + self.assertLen(space.parameters, 1) + + linear = root.select("model_type", ["linear"]) + # Test the selector. + self.assertEqual(linear.path_string, "") + self.assertEqual(linear.parameter_name, "model_type") + self.assertEqual(linear.parameter_values, ["linear"]) + linear.add_float_param( + "learning_rate", 0.1, 1.0, default_value=0.1, - scale_type=base_study_config.ScaleType.LOG) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(lr.parameter_name, 'learning_rate') - self.assertEqual(lr.parameter_values, []) - self.assertEqual(lr.path_string, 'model_type=dnn/optimizer_type=adam') - - # Chained select() calls, path length of 2. - ko = root.select('model_type', ['dnn']).select('optimizer_type', - ['adam']).add_bool_param( - 'use_keras_optimizer', - default_value=False) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(ko.parameter_name, 'use_keras_optimizer') - self.assertEqual(ko.parameter_values, []) - self.assertEqual(ko.path_string, 'model_type=dnn/optimizer_type=adam') - - ko.select_values(['True']) - self.assertEqual(ko.parameter_values, ['True']) - - selector = ko.add_float_param('keras specific', 1.3, 2.4, default_value=2.1) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(selector.parameter_name, 'keras specific') - self.assertEqual(selector.parameter_values, []) - self.assertEqual( - selector.path_string, - 'model_type=dnn/optimizer_type=adam/use_keras_optimizer=True') - - # Selects more than one node. - # selectors = dnn.select_all('optimizer_type', ['adam']) - # self.assertLen(selectors, 2) - - def testConditionalParametersWithReturnedSelectors(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - model_type = root.add_categorical_param('model_type', ['linear', 'dnn']) - learning_rate = model_type.select_values(['dnn']).add_float_param( - 'learning_rate', - 0.1, - 1.0, - default_value=0.001, - scale_type=base_study_config.ScaleType.LOG) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selectors. - self.assertEqual(model_type.parameter_values, ['dnn']) - self.assertEqual(learning_rate.parameter_name, 'learning_rate') - self.assertEqual(learning_rate.parameter_values, []) - self.assertEqual(learning_rate.path_string, 'model_type=dnn') - - # It is possible to select different values for the same selector. - optimizer_type = model_type.select_values(['linear', - 'dnn']).add_categorical_param( - 'optimizer_type', - ['adam', 'adagrad']) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selectors. - self.assertEqual(model_type.parameter_values, ['linear', 'dnn']) - self.assertEqual(optimizer_type.parameter_name, 'optimizer_type') - self.assertEqual(optimizer_type.parameter_values, []) - self.assertEqual(optimizer_type.path_string, 'model_type=linear') - - @parameterized.named_parameters( - ('Multi', 'units[0]', ('units', 0)), - ('Multi2', 'with_underscore[1]', ('with_underscore', 1)), - ('NotMulti', 'units', None), - ('NotMulti2', 'with space', None), - ('NotMulti3', 'with[8]space', None), - ('NotMulti4', 'units[0][4]', ('units[0]', 4)), - ('GinStyle', '_gin.ambient_net_exp_from_vec.block_type[3]', - ('_gin.ambient_net_exp_from_vec.block_type', 3)), - ) - def testParseMultiDimensionalParameterName(self, name, expected): - base_name_index = base_study_config.SearchSpaceSelector.parse_multi_dimensional_parameter_name( - name) - self.assertEqual(base_name_index, expected) + scale_type=base_study_config.ScaleType.LOG, + ) + # Test the search space. + self.assertLen(space.parameters, 1) + + dnn_optimizer = dnn.add_categorical_param("optimizer_type", ["adam", "adagrad"]) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(dnn_optimizer.path_string, "model_type=dnn") + self.assertEqual(dnn_optimizer.parameter_name, "optimizer_type") + self.assertEqual(dnn_optimizer.parameter_values, []) + + # Chained select() calls, path length of 1. + lr = ( + root.select("model_type", ["dnn"]) + .select("optimizer_type", ["adam"]) + .add_float_param( + "learning_rate", + 0.1, + 1.0, + default_value=0.1, + scale_type=base_study_config.ScaleType.LOG, + ) + ) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(lr.parameter_name, "learning_rate") + self.assertEqual(lr.parameter_values, []) + self.assertEqual(lr.path_string, "model_type=dnn/optimizer_type=adam") + + # Chained select() calls, path length of 2. + ko = ( + root.select("model_type", ["dnn"]) + .select("optimizer_type", ["adam"]) + .add_bool_param("use_keras_optimizer", default_value=False) + ) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(ko.parameter_name, "use_keras_optimizer") + self.assertEqual(ko.parameter_values, []) + self.assertEqual(ko.path_string, "model_type=dnn/optimizer_type=adam") + + ko.select_values(["True"]) + self.assertEqual(ko.parameter_values, ["True"]) + + selector = ko.add_float_param("keras specific", 1.3, 2.4, default_value=2.1) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selector. + self.assertEqual(selector.parameter_name, "keras specific") + self.assertEqual(selector.parameter_values, []) + self.assertEqual( + selector.path_string, + "model_type=dnn/optimizer_type=adam/use_keras_optimizer=True", + ) + + # Selects more than one node. + # selectors = dnn.select_all('optimizer_type', ['adam']) + # self.assertLen(selectors, 2) + + def testConditionalParametersWithReturnedSelectors(self): + space = base_study_config.SearchSpace() + self.assertEmpty(space.parameters) + root = space.select_root() + model_type = root.add_categorical_param("model_type", ["linear", "dnn"]) + learning_rate = model_type.select_values(["dnn"]).add_float_param( + "learning_rate", + 0.1, + 1.0, + default_value=0.001, + scale_type=base_study_config.ScaleType.LOG, + ) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selectors. + self.assertEqual(model_type.parameter_values, ["dnn"]) + self.assertEqual(learning_rate.parameter_name, "learning_rate") + self.assertEqual(learning_rate.parameter_values, []) + self.assertEqual(learning_rate.path_string, "model_type=dnn") + + # It is possible to select different values for the same selector. + optimizer_type = model_type.select_values( + ["linear", "dnn"] + ).add_categorical_param("optimizer_type", ["adam", "adagrad"]) + # Test the search space. + self.assertLen(space.parameters, 1) + # Test the selectors. + self.assertEqual(model_type.parameter_values, ["linear", "dnn"]) + self.assertEqual(optimizer_type.parameter_name, "optimizer_type") + self.assertEqual(optimizer_type.parameter_values, []) + self.assertEqual(optimizer_type.path_string, "model_type=linear") + + @parameterized.named_parameters( + ("Multi", "units[0]", ("units", 0)), + ("Multi2", "with_underscore[1]", ("with_underscore", 1)), + ("NotMulti", "units", None), + ("NotMulti2", "with space", None), + ("NotMulti3", "with[8]space", None), + ("NotMulti4", "units[0][4]", ("units[0]", 4)), + ( + "GinStyle", + "_gin.ambient_net_exp_from_vec.block_type[3]", + ("_gin.ambient_net_exp_from_vec.block_type", 3), + ), + ) + def testParseMultiDimensionalParameterName(self, name, expected): + base_name_index = base_study_config.SearchSpaceSelector.parse_multi_dimensional_parameter_name( + name + ) + self.assertEqual(base_name_index, expected) class SearchSpaceContainsTest(absltest.TestCase): - - def _space(self): - space = base_study_config.SearchSpace() - root = space.select_root() - root.add_float_param('learning-rate', 1e-4, 1e-2) - root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - return space - - def testFloatCat1(self): - self._space().assert_contains( - trial.ParameterDict({ - 'optimizer': 'adagrad', - 'learning-rate': 1e-2 - })) - - def testFloatCat2(self): - self.assertFalse(self._space().contains( - trial.ParameterDict({ - 'optimizer': 'adagrad', - 'BADPARAM': 1e-2 - }))) - - def testFloatCat3(self): - self.assertFalse(self._space().contains( - trial.ParameterDict({ - 'optimizer': 'adagrad', - 'learning-rate': 1e-2, - 'BADPARAM': 1e-2 - }))) - - def testFloatCat4(self): - self.assertFalse(self._space().contains( - trial.ParameterDict({ - 'optimizer': 'adagrad', - 'learning-rate': 1e2 - }))) - - -if __name__ == '__main__': - absltest.main() + def _space(self): + space = base_study_config.SearchSpace() + root = space.select_root() + root.add_float_param("learning-rate", 1e-4, 1e-2) + root.add_categorical_param("optimizer", ["adagrad", "adam", "experimental"]) + return space + + def testFloatCat1(self): + self._space().assert_contains( + trial.ParameterDict({"optimizer": "adagrad", "learning-rate": 1e-2}) + ) + + def testFloatCat2(self): + self.assertFalse( + self._space().contains( + trial.ParameterDict({"optimizer": "adagrad", "BADPARAM": 1e-2}) + ) + ) + + def testFloatCat3(self): + self.assertFalse( + self._space().contains( + trial.ParameterDict( + {"optimizer": "adagrad", "learning-rate": 1e-2, "BADPARAM": 1e-2} + ) + ) + ) + + def testFloatCat4(self): + self.assertFalse( + self._space().contains( + trial.ParameterDict({"optimizer": "adagrad", "learning-rate": 1e2}) + ) + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/common.py b/google/cloud/aiplatform/vizier/pyvizier/common.py index 15c710fa0f..5d19fc364b 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/common.py +++ b/google/cloud/aiplatform/vizier/pyvizier/common.py @@ -9,8 +9,8 @@ from google.protobuf import any_pb2 from google.protobuf.message import Message -M = TypeVar('M', bound=Message) -T = TypeVar('T') +M = TypeVar("M", bound=Message) +T = TypeVar("T") MetadataValue = Union[str, any_pb2.Any, Message] # Namespace Encoding. @@ -31,480 +31,486 @@ def _parse(arg: str) -> Tuple[str, ...]: - """Parses an encoded namespace string into a namespace tuple.""" - # The tricky part here is that arg.split('') has a length of 1, so it can't - # generate a zero-length tuple; we handle that corner case manually. - if not arg: - return () - # And, then, once we've handled the case of _parse(''), we note that all the - # other encoded strings begin with a colon. It thus contains no information - # and we can remove it. - # TODO: Once we're on Python 3.9, use: arg = arg.removeprefix(':') - if arg.startswith(':'): - arg = arg[1:] - # The rest of the algorithm is that we split on all colons, both - # escaped and unescaped. Then, we walk through the list of fragments and - # join back together the colons that were preceeded by an escape character, - # dropping the escape character as we go. - fragments = arg.split(':') - output = [] - join = False - for frag in fragments: - if join and frag and frag[-1] == '\\': - output[-1] += ':' + frag[:-1] - join = True - elif join: # Doesn't end in an escape character. - output[-1] += ':' + frag - join = False - elif frag and frag[-1] == '\\': # Don't join to previous. - output.append(frag[:-1]) - join = True - else: # Don't join to previous and doesn't end in an escape. - output.append(frag) - join = False - return tuple(output) + """Parses an encoded namespace string into a namespace tuple.""" + # The tricky part here is that arg.split('') has a length of 1, so it can't + # generate a zero-length tuple; we handle that corner case manually. + if not arg: + return () + # And, then, once we've handled the case of _parse(''), we note that all the + # other encoded strings begin with a colon. It thus contains no information + # and we can remove it. + # TODO: Once we're on Python 3.9, use: arg = arg.removeprefix(':') + if arg.startswith(":"): + arg = arg[1:] + # The rest of the algorithm is that we split on all colons, both + # escaped and unescaped. Then, we walk through the list of fragments and + # join back together the colons that were preceeded by an escape character, + # dropping the escape character as we go. + fragments = arg.split(":") + output = [] + join = False + for frag in fragments: + if join and frag and frag[-1] == "\\": + output[-1] += ":" + frag[:-1] + join = True + elif join: # Doesn't end in an escape character. + output[-1] += ":" + frag + join = False + elif frag and frag[-1] == "\\": # Don't join to previous. + output.append(frag[:-1]) + join = True + else: # Don't join to previous and doesn't end in an escape. + output.append(frag) + join = False + return tuple(output) @attr.frozen(eq=True, order=True, hash=True, auto_attribs=True, init=False) class Namespace(abc.Sequence): - r"""A namespace for the Metadata class. - - Namespaces form a tree; a particular namespace can be thought of as a tuple of - namespace components. - - You can create a Namespace from a string, with Namespace.decode(s), where - the string is parsed into components, splitting at colons; decode('a:b') gives - you a two-component namespace: ('a', 'b'). - Or, you can create that same Namespace from a tuple of strings/components - e.g. by constructing Namespace(('a', 'b')). In the tuple case, the strings - are not parsed and colons are ordinary characters. - - TLDR: If you decode() a namespace from a string, then ":" is a - reserved character, but when constructing from a tuple, there are no - reserved characters. - - Decoding the string form: - * Initial colons don't matter: Namespace.decode(':a') == Namespace('a'); - this is a single-component namespace. - * Colons separate components: - Namespace.decode('a:b') == Namespace(['a', 'b']). - (This is a two-component namespace.) - * Colons are encoded as r'\:': - Namespace.decode('a\\:b') == Namespace(('a:b')). - (This is a single-component namespace.) - - Conversions: For a Namespace x, - * Namespace.decode(x.encode()) == x; here, x.encode() will be a string with - colons separating the components. - * Namespaces act as a Sequence[str], so Namespace(tuple(x)) == x and - Namespace(x) == x. - """ - - _as_tuple: Tuple[str, ...] = attr.field(hash=True, eq=True, order=True) - - def __init__(self, arg: Iterable[str] = ()): - """Generates a Namespace from its component strings. - - Args: - arg: a tuple representation of a namespace. + r"""A namespace for the Metadata class. + + Namespaces form a tree; a particular namespace can be thought of as a tuple of + namespace components. + + You can create a Namespace from a string, with Namespace.decode(s), where + the string is parsed into components, splitting at colons; decode('a:b') gives + you a two-component namespace: ('a', 'b'). + Or, you can create that same Namespace from a tuple of strings/components + e.g. by constructing Namespace(('a', 'b')). In the tuple case, the strings + are not parsed and colons are ordinary characters. + + TLDR: If you decode() a namespace from a string, then ":" is a + reserved character, but when constructing from a tuple, there are no + reserved characters. + + Decoding the string form: + * Initial colons don't matter: Namespace.decode(':a') == Namespace('a'); + this is a single-component namespace. + * Colons separate components: + Namespace.decode('a:b') == Namespace(['a', 'b']). + (This is a two-component namespace.) + * Colons are encoded as r'\:': + Namespace.decode('a\\:b') == Namespace(('a:b')). + (This is a single-component namespace.) + + Conversions: For a Namespace x, + * Namespace.decode(x.encode()) == x; here, x.encode() will be a string with + colons separating the components. + * Namespaces act as a Sequence[str], so Namespace(tuple(x)) == x and + Namespace(x) == x. """ - arg = tuple(arg) - self.__attrs_init__(as_tuple=arg) - _ns_repr_table = str.maketrans({':': r'\:'}) + _as_tuple: Tuple[str, ...] = attr.field(hash=True, eq=True, order=True) - @classmethod - def decode(cls, s: str) -> 'Namespace': - r"""Decode a string into a Namespace. + def __init__(self, arg: Iterable[str] = ()): + """Generates a Namespace from its component strings. - For a Namespace x, Namespace.decode(x.encode()) == x. + Args: + arg: a tuple representation of a namespace. + """ + arg = tuple(arg) + self.__attrs_init__(as_tuple=arg) - Args: - s: A string where ':' separates namespace components, and colon is - escaped as r'\:'. + _ns_repr_table = str.maketrans({":": r"\:"}) - Returns: - A namespace. - """ - return Namespace(_parse(s)) - - def encode(self) -> str: - """Encodes a Namespace into a string. - - Given a Namespace x, Namespace.decode(x.encode()) == x. - - Returns: - Colons are escaped, then Namespace components are joined by colons. - """ - return ''.join( - [':' + c.translate(self._ns_repr_table) for c in self._as_tuple]) + @classmethod + def decode(cls, s: str) -> "Namespace": + r"""Decode a string into a Namespace. - def __len__(self) -> int: - """Number of components (elements of the tuple form).""" - return len(self._as_tuple) + For a Namespace x, Namespace.decode(x.encode()) == x. - def __add__(self, other: Iterable[str]) -> 'Namespace': - """Appends components onto the namespace.""" - return Namespace(self._as_tuple + tuple(other)) + Args: + s: A string where ':' separates namespace components, and colon is + escaped as r'\:'. - @overload - def __getitem__(self, key: int) -> str: - ... + Returns: + A namespace. + """ + return Namespace(_parse(s)) - @overload - def __getitem__(self, key: slice) -> 'Namespace': - ... + def encode(self) -> str: + """Encodes a Namespace into a string. - def __getitem__(self, key): - """Retrieves item by the specified key.""" - if isinstance(key, int): - return self._as_tuple[key] - return Namespace(self._as_tuple[key]) + Given a Namespace x, Namespace.decode(x.encode()) == x. - def __str__(self) -> str: - """Shows the namespace, fully escaped.""" - return self.encode() + Returns: + Colons are escaped, then Namespace components are joined by colons. + """ + return "".join([":" + c.translate(self._ns_repr_table) for c in self._as_tuple]) - def __repr__(self) -> str: - """Shows the namespace, fully escaped.""" - return f'Namespace({self.encode()})' + def __len__(self) -> int: + """Number of components (elements of the tuple form).""" + return len(self._as_tuple) - def startswith(self, prefix: Iterable[str]) -> bool: - """Returns True if this namespace starts with prefix.""" - ns_prefix = Namespace(prefix) - return self[:len(ns_prefix)] == ns_prefix + def __add__(self, other: Iterable[str]) -> "Namespace": + """Appends components onto the namespace.""" + return Namespace(self._as_tuple + tuple(other)) - -class _MetadataSingleNameSpace(Dict[str, MetadataValue]): - """Stores metadata associated with one namespace.""" - pass - - -class Metadata(abc.MutableMapping): - """Metadata class. - - This is the main interface for reading metadata from a Trial (writing metadata - should typically be done via the MetadataUpdateContext class.) - - This behaves like a str->str dict, within a given namespace. - mm = Metadata({'foo': 'Foo'}) - mm.get('foo') # Returns 'Foo' - mm['foo'] # Returns 'Foo' - mm['bar'] = 'Bar' - mm.update({'a': 'A'}, gleep='Gleep') - - 1. Keys are namespaced. Each Metadata object only interacts with one - Namespace, but a metadata object and its children share a - common set of (namespace, key, value) triplets. - - Namespaces form a tree, and you can walk down the tree. There are two - namespace operators: ns(s) which adds component(s) on to the namespace, and - abs_ns() which specifies the entire namespace. - - A Metadata() object is always created at the root of the namespace tree, - and the root is special (it's the only namespace that Vizier users can write - or conveniently read). Pythia algorithm developers should avoid the root - namespace, unless they intend to pass data to/from Vizier users. - - mm = Metadata({'foo': 'foofoo'}) - mm.ns('NewName')['bar'] = 'Bar' - mm['foo'] # Returns 'foofoo' - mm['bar'] # Throws a KeyError - mm.ns('NewName')['foo'] # Throws a KeyError - mm.ns('NewName')['bar'] # Returns 'Bar' - mm.ns('NewName').get('bar') # Returns 'Bar' - - # Use of abs_ns(). - mm = Metadata() - mm.abs_ns(Namespace(('NewName',)))['bar'] = 'Bar' - mm.abs_ns(Namespace(('NewName',))) # returns 'Bar' - - # Multi-component namespaces. - mm = Metadata() - mm.ns('a').ns('b')['foo'] = 'AB-foo' - mm.ns('a')['foo'] = 'A-foo' - mm['foo'] # Throws a KeyError - mm.ns('a')['foo'] # returns 'A-foo' - mm.ns('a').ns('b')['foo'] # returns 'AB-foo' - mm.abs_ns(Namespace(('a', 'b'))).get('foo') # Returns 'ab-foo' - mm.abs_ns(Namespace.decode('a:b')).get('foo') # Returns 'ab-foo' - - 2. Values can be protobufs. If `metadata['foo']` is an instance of `MyProto` - proto message or `Any` proto that packs a `MyProto` message, then the proto - can be recovered by calling: - my_proto = metadata.get_proto('foo', cls=MyProto) - isinstance(my_proto, MyProto) # Returns `True` - - 3. An iteration over a Metadata object only shows you the data in the current - namespace. So, - - mm = Metadata({'foo': 'foofoo'}) - for k, v in mm.ns('gleep'): - ... - - will not yield anything because there are no keys in the 'gleep' namespace. - WARNING: Because of this behavior, Metadata(mm) will quietly drop metadata - from all but mm's current namespace. - - Be aware that type(v) is MetadataValue, which includes protos in addition to - strings. - - To iterate over all the keys in all the namespaces use the namespaces() - method. - - mm : Metadata - for ns in mm.namespaces(): - for k, v in mm.abs_ns(ns).items(): + @overload + def __getitem__(self, key: int) -> str: ... - """ - def __init__(self, *args: Union[Dict[str, MetadataValue], - Iterable[Tuple[str, MetadataValue]]], - **kwargs: MetadataValue): - """Construct; this follows dict(), and puts data in the root namespace. - - You can pass it a dict, or an object that yields (key, value) - pairs, and those pairs will be put in the root namespace. - - Args: - *args: A dict or an iterable the yields key-value pairs. - **kwargs: key=value pairs to be added to the specified namespace. - """ - self._stores: DefaultDict[ - Namespace, _MetadataSingleNameSpace] = collections.defaultdict( - _MetadataSingleNameSpace) - self._namespace = Namespace() - self._store = self._stores[self._namespace] - self._store.update(*args, **kwargs) - - def abs_ns(self, namespace: Iterable[str] = ()) -> 'Metadata': - """Switches to a specified absolute namespace. - - All the Metadata object's data is shared between $self and the returned - object, but the new Metadata object will have a different default - namespace. - - Args: - namespace: a list of Namespace components. (Defaults to the root, empty - Namespace.) - - Returns: - A new Metadata object in the specified namespace; the new object shares - data (except the namespace) with $self. - """ - return self._copy_core(Namespace(namespace)) - - def ns(self, component: str) -> 'Metadata': - r"""Switches to a deeper namespace by appending a component. - - All the metadata is shared between $self and the returned value, but they - have a different current namespace. - - Args: - component: one component to be added to the current namespace. - - Returns: - A new Metadata object in the specified namespace; the new object shares - metadata (except the choice of namespace) with $self. - """ - new_ns: Namespace = self._namespace + (component,) - return self._copy_core(new_ns) - - def __repr__(self) -> str: - itemlist: List[str] = [] - for namespace, store in self._stores.items(): - item_string = f'(namespace:{namespace}, items: {store})' - itemlist.append(item_string) - return 'Metadata({}, current_namespace={})'.format(', '.join(itemlist), - self._namespace.encode()) - - def __str__(self) -> str: - return 'namespace: {} items: {}'.format(str(self._namespace), self._store) - - def get_proto(self, key: str, *, cls: Type[M]) -> Optional[M]: - """Deprecated. - - Use get() instead. - - Gets the metadata as type `cls`, or None if not possible. + @overload + def __getitem__(self, key: slice) -> "Namespace": + ... - Args: - key: - cls: Pass in a proto ***class***, not a proto object. + def __getitem__(self, key): + """Retrieves item by the specified key.""" + if isinstance(key, int): + return self._as_tuple[key] + return Namespace(self._as_tuple[key]) - Returns: - Proto message, if the value associated with the key exists and - can be parsed into cls; None otherwise. - """ - value = self._store.get(key, None) - if value is None: - return None - - if isinstance(value, cls): - # Starting from 3.10, pytype supports typeguard, which obsoletes - # the need for the `pytype:disable` clause. - return value # pytype: disable=bad-return-type - - if isinstance(value, any_pb2.Any): - # `value` is an Any proto potentially packing `cls`. - message = cls() - success = value.Unpack(message) - return message if success else None - - return None - - def get(self, - key: str, - default: Optional[T] = None, - *, - cls: Type[T] = str) -> Optional[T]: - """Gets the metadata as type `cls`, or None if not possible. - - Given regular string values, this function behaves exactly like a - regular string-to-string dict (within its namespace). - metadata = common.Metadata({'key': 'value'}) - assert metadata.get('key') == 'value' - assert metadata.get('badkey', 'badvalue') == 'badvalue' - - Example with numeric string values: - metadata = common.Metadata({'float': '1.2', 'int': '60'}) - assert metadata.get('float', cls=float) == 1.2 - assert metadata.get('badkey', 0.2, cls=float) == 0.2 - assert metadata.get('int', cls=int) == 60 - assert metadata.get('badkey', 1, cls=int) == 1 - - Example with `Duration` and `Any` proto values: - duration = Duration(seconds=60) - anyproto = Any() - anyproto.Pack(duration) - metadata = common.Metadata({'duration': duration, 'any': anyproto}) - assert metadata.get('duration', cls=Duration) == duration - assert metadata.get('any', cls=Duration) == duration - - Args: - key: - default: Default value. - cls: Desired type of the value. - - Returns: - Default if the key does not exist. Otherwise, the matching value is - parsed into type `cls`. For proto messages, it involves unpacking - Any proto. - """ - try: - value = self._store[key] - except KeyError: - return default - if isinstance(value, cls): - # Starting from 3.10, pytype supports typeguard, which obsoletes - # the need for the `pytype:disable` clause. - return value # pytype: disable=bad-return-type - if isinstance(value, any_pb2.Any): - # `value` is an Any proto potentially packing `cls`. - message = cls() - success = value.Unpack(message) - return message if success else None - return cls(value) - - # TODO: Rename to `abs_namespaces` - def namespaces(self) -> Tuple[Namespace, ...]: - """Get all namespaces for which there is at least one key. - - Returns: - For all `ns` in `md.namespaces()`, `md.abs_ns(ns)` is not empty. - """ - return tuple([ns for ns, store in self._stores.items() if store]) + def __str__(self) -> str: + """Shows the namespace, fully escaped.""" + return self.encode() - # TODO: Rename to `namespaces` - def subnamespaces(self) -> Tuple[Namespace, ...]: - """Returns relative namespaces that are at or below the current namespace. + def __repr__(self) -> str: + """Shows the namespace, fully escaped.""" + return f"Namespace({self.encode()})" - For all `ns` in `md.subnamespaces()`, `md.abs_ns(md.current_ns() + ns)` is - not empty. E.g. if namespace 'foo:bar' is non-empty, and you're in - namespace 'foo', then the result will contain namespace 'bar'. + def startswith(self, prefix: Iterable[str]) -> bool: + """Returns True if this namespace starts with prefix.""" + ns_prefix = Namespace(prefix) + return self[: len(ns_prefix)] == ns_prefix - Returns: - For namespaces that begin with the current namespace and are - non-empty, this returns a namespace object that contains the relative - path from the current namespace. - """ - return tuple([ - Namespace(ns[len(self._namespace):]) - for ns, store in self._stores.items() - if store and ns.startswith(self._namespace) - ]) - def current_ns(self) -> Namespace: - """Displays the object's current Namespace.""" - return self._namespace +class _MetadataSingleNameSpace(Dict[str, MetadataValue]): + """Stores metadata associated with one namespace.""" - # START OF abstract methods inherited from `MutableMapping` base class. - def __getitem__(self, key: str) -> MetadataValue: - return self._store.__getitem__(key) + pass - def __setitem__(self, key: str, value: MetadataValue): - self._store[key] = value - def __delitem__(self, key: str): - del self._store[key] +class Metadata(abc.MutableMapping): + """Metadata class. + + This is the main interface for reading metadata from a Trial (writing metadata + should typically be done via the MetadataUpdateContext class.) + + This behaves like a str->str dict, within a given namespace. + mm = Metadata({'foo': 'Foo'}) + mm.get('foo') # Returns 'Foo' + mm['foo'] # Returns 'Foo' + mm['bar'] = 'Bar' + mm.update({'a': 'A'}, gleep='Gleep') + + 1. Keys are namespaced. Each Metadata object only interacts with one + Namespace, but a metadata object and its children share a + common set of (namespace, key, value) triplets. + + Namespaces form a tree, and you can walk down the tree. There are two + namespace operators: ns(s) which adds component(s) on to the namespace, and + abs_ns() which specifies the entire namespace. + + A Metadata() object is always created at the root of the namespace tree, + and the root is special (it's the only namespace that Vizier users can write + or conveniently read). Pythia algorithm developers should avoid the root + namespace, unless they intend to pass data to/from Vizier users. + + mm = Metadata({'foo': 'foofoo'}) + mm.ns('NewName')['bar'] = 'Bar' + mm['foo'] # Returns 'foofoo' + mm['bar'] # Throws a KeyError + mm.ns('NewName')['foo'] # Throws a KeyError + mm.ns('NewName')['bar'] # Returns 'Bar' + mm.ns('NewName').get('bar') # Returns 'Bar' + + # Use of abs_ns(). + mm = Metadata() + mm.abs_ns(Namespace(('NewName',)))['bar'] = 'Bar' + mm.abs_ns(Namespace(('NewName',))) # returns 'Bar' + + # Multi-component namespaces. + mm = Metadata() + mm.ns('a').ns('b')['foo'] = 'AB-foo' + mm.ns('a')['foo'] = 'A-foo' + mm['foo'] # Throws a KeyError + mm.ns('a')['foo'] # returns 'A-foo' + mm.ns('a').ns('b')['foo'] # returns 'AB-foo' + mm.abs_ns(Namespace(('a', 'b'))).get('foo') # Returns 'ab-foo' + mm.abs_ns(Namespace.decode('a:b')).get('foo') # Returns 'ab-foo' + + 2. Values can be protobufs. If `metadata['foo']` is an instance of `MyProto` + proto message or `Any` proto that packs a `MyProto` message, then the proto + can be recovered by calling: + my_proto = metadata.get_proto('foo', cls=MyProto) + isinstance(my_proto, MyProto) # Returns `True` + + 3. An iteration over a Metadata object only shows you the data in the current + namespace. So, + + mm = Metadata({'foo': 'foofoo'}) + for k, v in mm.ns('gleep'): + ... - def __iter__(self): - return iter(self._store) + will not yield anything because there are no keys in the 'gleep' namespace. + WARNING: Because of this behavior, Metadata(mm) will quietly drop metadata + from all but mm's current namespace. - def __len__(self): - return len(self._store) + Be aware that type(v) is MetadataValue, which includes protos in addition to + strings. - def __copy__(self) -> 'Metadata': - """Shallow copy -- metadata continues to be shared. + To iterate over all the keys in all the namespaces use the namespaces() + method. - Returns: - A copy of the object. + mm : Metadata + for ns in mm.namespaces(): + for k, v in mm.abs_ns(ns).items(): + ... """ - return self._copy_core(self._namespace) - - # END OF Abstract methods inherited from `MutableMapping` base class. - def _copy_core(self, ns: Namespace) -> 'Metadata': - """Shallow copy: metadata is shared, default namespace changes. - - Args: - ns: the namespace to use for the new object. - - Returns: - A copy of the object. - """ - md = Metadata() - md._namespace = ns # pylint: disable='protected-access' - md._stores = self._stores # pylint: disable='protected-access' - md._store = md._stores[md._namespace] # pylint: disable='protected-access' - return md - - def update(self, *args: Union[Dict[str, MetadataValue], - Iterable[Tuple[str, MetadataValue]]], - **kwargs: MetadataValue) -> None: - self._store.update(*args, **kwargs) - - def attach(self, other: 'Metadata') -> None: - """Attach the $other metadata as a descendent of this metadata. - - More precisely, it takes the part of `other`'s namespace that is at or - below `other`'s current namespace, and attaches it to `self`'s current - namespace. - * Tree structure is preserved and nothing is flattened. - * Attached data overwrites existing data, item-by-item, not - namepace-by-namespace. - - So, if we have - other = Metadata() - other.abs_ns(Namespace.(('x', 'y', 'z'))['foo'] = 'bar' - m = Metadata() - m.ns('w').attach(other.ns('x')) - then - m.abs_ns(('w', 'y', 'z'))['foo'] will contain 'bar'. - - Args: - other: a Metadata object to copy from. - """ - for ns in other.subnamespaces(): - self._stores[self._namespace + ns].update( - other.abs_ns(other.current_ns() + ns)) + def __init__( + self, + *args: Union[Dict[str, MetadataValue], Iterable[Tuple[str, MetadataValue]]], + **kwargs: MetadataValue, + ): + """Construct; this follows dict(), and puts data in the root namespace. + + You can pass it a dict, or an object that yields (key, value) + pairs, and those pairs will be put in the root namespace. + + Args: + *args: A dict or an iterable the yields key-value pairs. + **kwargs: key=value pairs to be added to the specified namespace. + """ + self._stores: DefaultDict[ + Namespace, _MetadataSingleNameSpace + ] = collections.defaultdict(_MetadataSingleNameSpace) + self._namespace = Namespace() + self._store = self._stores[self._namespace] + self._store.update(*args, **kwargs) + + def abs_ns(self, namespace: Iterable[str] = ()) -> "Metadata": + """Switches to a specified absolute namespace. + + All the Metadata object's data is shared between $self and the returned + object, but the new Metadata object will have a different default + namespace. + + Args: + namespace: a list of Namespace components. (Defaults to the root, empty + Namespace.) + + Returns: + A new Metadata object in the specified namespace; the new object shares + data (except the namespace) with $self. + """ + return self._copy_core(Namespace(namespace)) + + def ns(self, component: str) -> "Metadata": + r"""Switches to a deeper namespace by appending a component. + + All the metadata is shared between $self and the returned value, but they + have a different current namespace. + + Args: + component: one component to be added to the current namespace. + + Returns: + A new Metadata object in the specified namespace; the new object shares + metadata (except the choice of namespace) with $self. + """ + new_ns: Namespace = self._namespace + (component,) + return self._copy_core(new_ns) + + def __repr__(self) -> str: + itemlist: List[str] = [] + for namespace, store in self._stores.items(): + item_string = f"(namespace:{namespace}, items: {store})" + itemlist.append(item_string) + return "Metadata({}, current_namespace={})".format( + ", ".join(itemlist), self._namespace.encode() + ) + + def __str__(self) -> str: + return "namespace: {} items: {}".format(str(self._namespace), self._store) + + def get_proto(self, key: str, *, cls: Type[M]) -> Optional[M]: + """Deprecated. + + Use get() instead. + + Gets the metadata as type `cls`, or None if not possible. + + Args: + key: + cls: Pass in a proto ***class***, not a proto object. + + Returns: + Proto message, if the value associated with the key exists and + can be parsed into cls; None otherwise. + """ + value = self._store.get(key, None) + if value is None: + return None + + if isinstance(value, cls): + # Starting from 3.10, pytype supports typeguard, which obsoletes + # the need for the `pytype:disable` clause. + return value # pytype: disable=bad-return-type + + if isinstance(value, any_pb2.Any): + # `value` is an Any proto potentially packing `cls`. + message = cls() + success = value.Unpack(message) + return message if success else None + + return None + + def get( + self, key: str, default: Optional[T] = None, *, cls: Type[T] = str + ) -> Optional[T]: + """Gets the metadata as type `cls`, or None if not possible. + + Given regular string values, this function behaves exactly like a + regular string-to-string dict (within its namespace). + metadata = common.Metadata({'key': 'value'}) + assert metadata.get('key') == 'value' + assert metadata.get('badkey', 'badvalue') == 'badvalue' + + Example with numeric string values: + metadata = common.Metadata({'float': '1.2', 'int': '60'}) + assert metadata.get('float', cls=float) == 1.2 + assert metadata.get('badkey', 0.2, cls=float) == 0.2 + assert metadata.get('int', cls=int) == 60 + assert metadata.get('badkey', 1, cls=int) == 1 + + Example with `Duration` and `Any` proto values: + duration = Duration(seconds=60) + anyproto = Any() + anyproto.Pack(duration) + metadata = common.Metadata({'duration': duration, 'any': anyproto}) + assert metadata.get('duration', cls=Duration) == duration + assert metadata.get('any', cls=Duration) == duration + + Args: + key: + default: Default value. + cls: Desired type of the value. + + Returns: + Default if the key does not exist. Otherwise, the matching value is + parsed into type `cls`. For proto messages, it involves unpacking + Any proto. + """ + try: + value = self._store[key] + except KeyError: + return default + if isinstance(value, cls): + # Starting from 3.10, pytype supports typeguard, which obsoletes + # the need for the `pytype:disable` clause. + return value # pytype: disable=bad-return-type + if isinstance(value, any_pb2.Any): + # `value` is an Any proto potentially packing `cls`. + message = cls() + success = value.Unpack(message) + return message if success else None + return cls(value) + + # TODO: Rename to `abs_namespaces` + def namespaces(self) -> Tuple[Namespace, ...]: + """Get all namespaces for which there is at least one key. + + Returns: + For all `ns` in `md.namespaces()`, `md.abs_ns(ns)` is not empty. + """ + return tuple([ns for ns, store in self._stores.items() if store]) + + # TODO: Rename to `namespaces` + def subnamespaces(self) -> Tuple[Namespace, ...]: + """Returns relative namespaces that are at or below the current namespace. + + For all `ns` in `md.subnamespaces()`, `md.abs_ns(md.current_ns() + ns)` is + not empty. E.g. if namespace 'foo:bar' is non-empty, and you're in + namespace 'foo', then the result will contain namespace 'bar'. + + Returns: + For namespaces that begin with the current namespace and are + non-empty, this returns a namespace object that contains the relative + path from the current namespace. + """ + return tuple( + [ + Namespace(ns[len(self._namespace) :]) + for ns, store in self._stores.items() + if store and ns.startswith(self._namespace) + ] + ) + + def current_ns(self) -> Namespace: + """Displays the object's current Namespace.""" + return self._namespace + + # START OF abstract methods inherited from `MutableMapping` base class. + def __getitem__(self, key: str) -> MetadataValue: + return self._store.__getitem__(key) + + def __setitem__(self, key: str, value: MetadataValue): + self._store[key] = value + + def __delitem__(self, key: str): + del self._store[key] + + def __iter__(self): + return iter(self._store) + + def __len__(self): + return len(self._store) + + def __copy__(self) -> "Metadata": + """Shallow copy -- metadata continues to be shared. + + Returns: + A copy of the object. + """ + return self._copy_core(self._namespace) + + # END OF Abstract methods inherited from `MutableMapping` base class. + + def _copy_core(self, ns: Namespace) -> "Metadata": + """Shallow copy: metadata is shared, default namespace changes. + + Args: + ns: the namespace to use for the new object. + + Returns: + A copy of the object. + """ + md = Metadata() + md._namespace = ns # pylint: disable='protected-access' + md._stores = self._stores # pylint: disable='protected-access' + md._store = md._stores[md._namespace] # pylint: disable='protected-access' + return md + + def update( + self, + *args: Union[Dict[str, MetadataValue], Iterable[Tuple[str, MetadataValue]]], + **kwargs: MetadataValue, + ) -> None: + self._store.update(*args, **kwargs) + + def attach(self, other: "Metadata") -> None: + """Attach the $other metadata as a descendent of this metadata. + + More precisely, it takes the part of `other`'s namespace that is at or + below `other`'s current namespace, and attaches it to `self`'s current + namespace. + * Tree structure is preserved and nothing is flattened. + * Attached data overwrites existing data, item-by-item, not + namepace-by-namespace. + + So, if we have + other = Metadata() + other.abs_ns(Namespace.(('x', 'y', 'z'))['foo'] = 'bar' + m = Metadata() + m.ns('w').attach(other.ns('x')) + then + m.abs_ns(('w', 'y', 'z'))['foo'] will contain 'bar'. + + Args: + other: a Metadata object to copy from. + """ + for ns in other.subnamespaces(): + self._stores[self._namespace + ns].update( + other.abs_ns(other.current_ns() + ns) + ) diff --git a/google/cloud/aiplatform/vizier/pyvizier/common_test.py b/google/cloud/aiplatform/vizier/pyvizier/common_test.py index 54162585c4..718696c4ed 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/common_test.py +++ b/google/cloud/aiplatform/vizier/pyvizier/common_test.py @@ -8,327 +8,334 @@ class MetadataGetClsTest(absltest.TestCase): - - def test_get_proto(self): - duration = duration_pb2.Duration(seconds=60) - anyproto = any_pb2.Any() - anyproto.Pack(duration) - metadata = common.Metadata(duration=duration, any=anyproto) - - self.assertEqual( - metadata.get_proto('duration', cls=duration_pb2.Duration), duration) - self.assertEqual( - metadata.get_proto('any', cls=duration_pb2.Duration), duration) - self.assertEqual( - metadata.get('duration', cls=duration_pb2.Duration), duration) - self.assertEqual(metadata.get('any', cls=duration_pb2.Duration), duration) - - def test_get_int(self): - metadata = common.Metadata({'string': '30', 'int': '60'}) - self.assertEqual(metadata.get('string', cls=int), 30) - self.assertEqual(metadata.get('int', cls=int), 60) - self.assertEqual(metadata.get('badkey', 1, cls=int), 1) + def test_get_proto(self): + duration = duration_pb2.Duration(seconds=60) + anyproto = any_pb2.Any() + anyproto.Pack(duration) + metadata = common.Metadata(duration=duration, any=anyproto) + + self.assertEqual( + metadata.get_proto("duration", cls=duration_pb2.Duration), duration + ) + self.assertEqual(metadata.get_proto("any", cls=duration_pb2.Duration), duration) + self.assertEqual(metadata.get("duration", cls=duration_pb2.Duration), duration) + self.assertEqual(metadata.get("any", cls=duration_pb2.Duration), duration) + + def test_get_int(self): + metadata = common.Metadata({"string": "30", "int": "60"}) + self.assertEqual(metadata.get("string", cls=int), 30) + self.assertEqual(metadata.get("int", cls=int), 60) + self.assertEqual(metadata.get("badkey", 1, cls=int), 1) class MetadataNamespaceTest(absltest.TestCase): - - def test_basic(self): - ns0 = common.Namespace() - self.assertEmpty(ns0) - self.assertEqual(str(ns0), '') - self.assertEqual(ns0.encode(), '') - self.assertEqual(ns0, common.Namespace.decode('')) - n1t = common.Namespace(('aerer',)) - self.assertLen(n1t, 1) - n1 = common.Namespace.decode('a78') - self.assertLen(n1, 1) - self.assertEqual(str(n1), ':a78') - n2 = common.Namespace(('a78', 'bfe')) - self.assertLen(n2, 2) - n2s1 = common.Namespace.decode('a78:bfe') - self.assertLen(n2s1, 2) - self.assertEqual(n2.encode(), n2s1.encode()) - n2s2 = common.Namespace.decode(':a78:bfe') - self.assertLen(n2s2, 2) - self.assertEqual(n2.encode(), n2s2.encode()) - self.assertEqual(n2, n2s2) - self.assertEqual(n2s1, n2s2) - ns = common.Namespace(('a', 'b')) - self.assertLen(ns, 2) - self.assertEqual(tuple(ns), ('a', 'b')) - self.assertEqual(str(ns), ':a:b') - self.assertEqual(ns.encode(), ':a:b') - - def test_escape(self): - s1 = 'a\\:A' - ns1 = common.Namespace.decode(s1) - self.assertLen(ns1, 1) - self.assertEqual(str(ns1), ':a\\:A') - self.assertEqual(ns1.encode(), ':' + s1) - self.assertEqual(common.Namespace.decode(ns1.encode()), ns1) - # - s2 = 'b:B' - ns2 = common.Namespace.decode(s2) - self.assertLen(ns2, 2) - self.assertEqual(str(ns2), ':' + s2) - self.assertEqual(ns2.encode(), ':' + s2) - self.assertEqual(common.Namespace.decode(ns2.encode()), ns2) - # - s1e1 = ':b\\B' - ns1e1 = common.Namespace.decode(s1e1) - self.assertLen(ns1e1, 1) - self.assertEqual(ns1e1.encode(), s1e1) - self.assertEqual(common.Namespace.decode(ns1e1.encode()), ns1e1) - ns1e2 = common.Namespace((s1e1.lstrip(':'),)) - self.assertLen(ns1e2, 1) - self.assertEqual(ns1e2.encode(), s1e1) - self.assertEqual(ns1e2, ns1e1) - self.assertEqual(common.Namespace.decode(ns1e2.encode()), ns1e2) - # - s1c = r':b\:B' - ns1c = common.Namespace.decode(s1c) - self.assertLen(ns1c, 1) - # Initial colon is harmlessly removed. - self.assertEqual(ns1c.encode(), s1c) - self.assertEqual(common.Namespace.decode(ns1c.encode()), ns1c) - self.assertEqual(common.Namespace(('b:B',)), ns1c) + def test_basic(self): + ns0 = common.Namespace() + self.assertEmpty(ns0) + self.assertEqual(str(ns0), "") + self.assertEqual(ns0.encode(), "") + self.assertEqual(ns0, common.Namespace.decode("")) + n1t = common.Namespace(("aerer",)) + self.assertLen(n1t, 1) + n1 = common.Namespace.decode("a78") + self.assertLen(n1, 1) + self.assertEqual(str(n1), ":a78") + n2 = common.Namespace(("a78", "bfe")) + self.assertLen(n2, 2) + n2s1 = common.Namespace.decode("a78:bfe") + self.assertLen(n2s1, 2) + self.assertEqual(n2.encode(), n2s1.encode()) + n2s2 = common.Namespace.decode(":a78:bfe") + self.assertLen(n2s2, 2) + self.assertEqual(n2.encode(), n2s2.encode()) + self.assertEqual(n2, n2s2) + self.assertEqual(n2s1, n2s2) + ns = common.Namespace(("a", "b")) + self.assertLen(ns, 2) + self.assertEqual(tuple(ns), ("a", "b")) + self.assertEqual(str(ns), ":a:b") + self.assertEqual(ns.encode(), ":a:b") + + def test_escape(self): + s1 = "a\\:A" + ns1 = common.Namespace.decode(s1) + self.assertLen(ns1, 1) + self.assertEqual(str(ns1), ":a\\:A") + self.assertEqual(ns1.encode(), ":" + s1) + self.assertEqual(common.Namespace.decode(ns1.encode()), ns1) + # + s2 = "b:B" + ns2 = common.Namespace.decode(s2) + self.assertLen(ns2, 2) + self.assertEqual(str(ns2), ":" + s2) + self.assertEqual(ns2.encode(), ":" + s2) + self.assertEqual(common.Namespace.decode(ns2.encode()), ns2) + # + s1e1 = ":b\\B" + ns1e1 = common.Namespace.decode(s1e1) + self.assertLen(ns1e1, 1) + self.assertEqual(ns1e1.encode(), s1e1) + self.assertEqual(common.Namespace.decode(ns1e1.encode()), ns1e1) + ns1e2 = common.Namespace((s1e1.lstrip(":"),)) + self.assertLen(ns1e2, 1) + self.assertEqual(ns1e2.encode(), s1e1) + self.assertEqual(ns1e2, ns1e1) + self.assertEqual(common.Namespace.decode(ns1e2.encode()), ns1e2) + # + s1c = r":b\:B" + ns1c = common.Namespace.decode(s1c) + self.assertLen(ns1c, 1) + # Initial colon is harmlessly removed. + self.assertEqual(ns1c.encode(), s1c) + self.assertEqual(common.Namespace.decode(ns1c.encode()), ns1c) + self.assertEqual(common.Namespace(("b:B",)), ns1c) class MetadataTest(absltest.TestCase): - - def create_test_metadata(self): - md = common.Metadata({'bar': 'bar_v'}, foo='foo_v') - md.ns('Name').update(foo='Name_foo_v', baz='Name_baz_v') - return md - - def test_empty_namespaces(self): - md = common.Metadata() - self.assertEmpty(list(md.namespaces())) - md = common.Metadata().ns('ns') - self.assertEmpty(list(md.namespaces())) - - def test_nonempty_namespaces(self): - mm = self.create_test_metadata() - self.assertLen(mm.namespaces(), 2) - - def test_getters_are_consistent_when_item_is_in_dict(self): - mm = self.create_test_metadata() - self.assertEqual(mm['foo'], 'foo_v') - self.assertEqual(mm.get('foo'), 'foo_v') - - def test_getters_are_consistent_when_item_is_not_in_dict(self): - mm = self.create_test_metadata() - self.assertIsNone(mm.get('baz')) - with self.assertRaises(KeyError): - _ = mm['baz'] - - def test_separator_is_not_allowed_as_keys_after_init(self): - mm = self.create_test_metadata() - with self.assertRaises(KeyError): - _ = mm['Name_foo'] - - def test_namespace_works_as_intended(self): - mm = self.create_test_metadata() - self.assertEqual(mm.ns('Name')['foo'], 'Name_foo_v') - self.assertIsNone(mm.ns('Name').get('bar')) - - mm_name = mm.ns('Name') - self.assertEqual(mm_name['foo'], 'Name_foo_v') - self.assertIsNone(mm_name.get('bar')) - self.assertEqual(mm.ns('Name')['foo'], 'Name_foo_v') - - def test_create_new_namespace(self): - # Calling ns() with an unexisting namespace should work fine. - mm = self.create_test_metadata() - mm.ns('NewName')['foo'] = 'NewName_foo_v' - self.assertEqual(mm.ns('NewName')['foo'], 'NewName_foo_v') - self.assertIsNone(mm.ns('NewName').get('bar')) - - def test_changing_namespace_copies_reference(self): - mm = self.create_test_metadata() - # Calling ns() copies by reference so any changes to the returned Metadata - # object is reflected in the original object. - mm_in_namespace = mm.ns('Name') - mm_in_namespace['foofoo'] = 'Name_foofoo_v' - self.assertEqual(mm.ns('Name')['foofoo'], 'Name_foofoo_v') - - def test_iterators(self): - mm = self.create_test_metadata() - self.assertSequenceEqual(list(mm.keys()), ['bar', 'foo']) - self.assertSequenceEqual( - list(mm.ns('Name').values()), ['Name_foo_v', 'Name_baz_v']) - self.assertLen(list(mm.items()), 2) - - def test_repr_str(self): - mm = self.create_test_metadata() - self.assertNotEmpty(str(mm), '') - self.assertNotEmpty(repr(mm), repr('')) - - def test_update(self): - md = common.Metadata(foo='foo_v') - md.ns('Name').update(foo='Name_foo_v', baz='Name_baz_v') - - md2 = common.Metadata() - md2.ns('Name').update(foo='Name_foo_v2', bar='Name_bar_v2') - - md.ns('Name').update(md2.ns('Name')) - - self.assertLen(md.ns('Name'), 3) - self.assertIn('bar', md.ns('Name')) - - def test_copy(self): - # There's no useful distinction to be made between copy.copy() and - # copy.deepcopy(). - mm = common.Metadata().ns('ns1') - mm.update(foo='bar') - mm_copy = copy.copy(mm) - mm_deepcopy = copy.deepcopy(mm) - # Check that copies match. - self.assertEqual(mm['foo'], 'bar') - self.assertEqual(mm_copy['foo'], 'bar') - self.assertEqual(mm_deepcopy['foo'], 'bar') - self.assertEqual(mm_deepcopy.namespaces(), mm.namespaces()) - self.assertEqual(mm_copy.namespaces(), mm.namespaces()) - # Check that the deep copy is disconnected. - mm_deepcopy['nerf'] = 'gleep' - with self.assertRaises(KeyError): - mm['nerf'] # pylint: disable=pointless-statement - with self.assertRaises(KeyError): - mm_copy['nerf'] # pylint: disable=pointless-statement - # Check that the shallow copy shares the metadata store with the original. - mm_copy['blip'] = 'tonk' - self.assertEqual(mm['blip'], mm_copy['blip']) - # ... but no sharing with the deep copy. - with self.assertRaises(KeyError): - mm_deepcopy['blip'] # pylint: disable=pointless-statement - # Here's a test for a specific bug, where Metadata._store is improperly - # disconnected from Metadata._stores. - mx = common.Metadata() - copy.copy(mx).ns('A')['a'] = 'Aa' - self.assertEqual(mx.ns('A')['a'], 'Aa') - - def test_construction(self): - # Test with iterables. - m0i = common.Namespace([]) - self.assertEmpty(m0i) - m0d = common.Namespace.decode('') - self.assertEmpty(m0d) - self.assertEqual(m0d, m0i) - m1i = common.Namespace(['abc']) - self.assertLen(m1i, 1) - self.assertEqual(m1i, common.Namespace(tuple(m1i))) - self.assertEqual(m1i, common.Namespace.decode(m1i.encode())) - m2i = common.Namespace(['abc', 'def']) - self.assertLen(m2i, 2) - self.assertEqual(m2i, common.Namespace(tuple(m2i))) - self.assertEqual(m2i, common.Namespace.decode(m2i.encode())) - m3i = common.Namespace(['abc', 'de:f']) - self.assertLen(m3i, 2) - self.assertEqual(m3i, common.Namespace(tuple(m3i))) - self.assertEqual(m3i, common.Namespace.decode(m3i.encode())) - # Test with strings. - m1sc = common.Namespace.decode(':abc') - self.assertLen(m1sc, 1) - self.assertEqual(m1sc, common.Namespace(tuple(m1sc))) - self.assertEqual(m1sc, common.Namespace.decode(m1sc.encode())) - m1s = common.Namespace.decode('abc') - self.assertLen(m1s, 1) - self.assertEqual(m1s, common.Namespace(tuple(m1s))) - self.assertEqual(m1s, common.Namespace.decode(m1s.encode())) - m2s = common.Namespace.decode('abc:def') - self.assertLen(m2s, 2) - self.assertEqual(m2s, common.Namespace(tuple(m2s))) - self.assertEqual(m2s, common.Namespace.decode(m2s.encode())) - m3s = common.Namespace.decode('abc:de\\f') - self.assertLen(m3s, 2) - self.assertEqual(m3s, common.Namespace(tuple(m3s))) - self.assertEqual(m3s, common.Namespace.decode(m3s.encode())) - - def test_startswith(self): - m1 = common.Namespace(['aa', 'bb']) - self.assertTrue(m1.startswith(common.Namespace(['aa']))) - self.assertTrue(m1.startswith(common.Namespace(['aa', 'bb']))) - self.assertTrue(m1.startswith(m1)) - self.assertTrue(m1.startswith(common.Namespace(tuple(m1)))) - self.assertFalse(m1.startswith(common.Namespace(['bb']))) - self.assertFalse(m1.startswith(common.Namespace(['aa', 'bb', 'cc']))) - self.assertFalse(m1.startswith(common.Namespace(['bb', 'bb']))) - self.assertFalse(m1.startswith(common.Namespace(['aa', 'aa']))) - - def test_subnamespace(self): - mm = common.Metadata() - mm.ns('ns1')['foo'] = 'bar' - mm.ns('ns2')['foo'] = 'bar' - mm.ns('ns1').ns('ns11')['foo'] = 'bar' - mm.ns('ns1').ns('ns:11')['gleep'] = 'nerf' - - self.assertSequenceEqual(mm.subnamespaces(), [ - common.Namespace(['ns1']), - common.Namespace(['ns2']), - common.Namespace(['ns1', 'ns11']), - common.Namespace(['ns1', 'ns:11']), - ]) - self.assertSequenceEqual( - mm.ns('ns1').subnamespaces(), [ - common.Namespace([]), - common.Namespace(['ns11']), - common.Namespace(['ns:11']) - ]) - self.assertSequenceEqual(mm.ns('ns2').subnamespaces(), [common.Namespace()]) - self.assertSequenceEqual(mm.ns('ns3').subnamespaces(), []) - - def test_namespace_add(self): - n0 = common.Namespace() - self.assertEmpty(n0) - self.assertEqual(n0 + (), common.Namespace([])) - self.assertEqual(n0 + ('ab',), common.Namespace([ - 'ab', - ])) - self.assertEqual(n0 + ('a:b',), common.Namespace(['a:b'])) - self.assertEqual(n0 + ('a:b',), common.Namespace(['a:b'])) - self.assertEqual(n0 + ('ab', 'cd'), common.Namespace(['ab', 'cd'])) - n1 = common.Namespace(['xy']) - self.assertLen(n1, 1) - self.assertEqual(n1 + ('ab',), common.Namespace(['xy', 'ab'])) - self.assertEqual(n1 + ('a:b',), common.Namespace(['xy', 'a:b'])) - self.assertEqual(n1 + ('a:b',), common.Namespace(['xy', 'a:b'])) - n2 = common.Namespace(['xy', 'zw']) - self.assertLen(n2, 2) - self.assertLen(n2 + ('ab',), 3) - self.assertEqual(n2 + ('ab',), common.Namespace(['xy', 'zw', 'ab'])) - self.assertLen(n2 + ('ab', 'cd'), 4) - self.assertEqual(n2 + ('ab', 'cd'), common.Namespace.decode('xy:zw:ab:cd')) - - def test_metadata_attach(self): - # Set up a metadata tree. - mm = common.Metadata() - mm.ns('ns1').ns('ns:11').update(foo='bar') - mm.ns('ns1').ns('ns12').update(foo='gleep') - mm.ns('ns1').update(foo='nerf') - mm.ns('ns|').update(foo='pag') - # Attach that metadata tree to a branch of an empty tree. - m1 = common.Metadata() - m1.ns('ns0').ns('ns00').attach(mm) - self.assertEmpty(m1.abs_ns()) - self.assertEqual(m1.ns('ns0').ns('ns00'), mm) - self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1', 'ns:11'])['foo'], 'bar') - self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1', 'ns12'])['foo'], 'gleep') - self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns1'])['foo'], 'nerf') - self.assertEqual(m1.abs_ns(['ns0', 'ns00', 'ns|'])['foo'], 'pag') - # Attach just part of $mm to a branch of a new, empty tree. - m2 = common.Metadata() - m2.ns('nsX').attach(mm.ns('ns1')) - self.assertEqual(m2.abs_ns(['nsX', 'ns:11'])['foo'], 'bar') - self.assertEqual(m2.abs_ns(['nsX', 'ns12'])['foo'], 'gleep') - self.assertEqual(m2.abs_ns(['nsX'])['foo'], 'nerf') - # Check that attach() overwrites key collisions, but preserves other data. - m3 = common.Metadata() - m3['foo'] = 'Y' # This will be overwritten. - m3['z'] = 'Z' # This will not be overwritten. - m3.attach(mm.ns('ns1').ns('ns:11')) - self.assertEqual(m3['z'], 'Z') - self.assertEqual(m3['foo'], 'bar') - - -if __name__ == '__main__': - absltest.main() + def create_test_metadata(self): + md = common.Metadata({"bar": "bar_v"}, foo="foo_v") + md.ns("Name").update(foo="Name_foo_v", baz="Name_baz_v") + return md + + def test_empty_namespaces(self): + md = common.Metadata() + self.assertEmpty(list(md.namespaces())) + md = common.Metadata().ns("ns") + self.assertEmpty(list(md.namespaces())) + + def test_nonempty_namespaces(self): + mm = self.create_test_metadata() + self.assertLen(mm.namespaces(), 2) + + def test_getters_are_consistent_when_item_is_in_dict(self): + mm = self.create_test_metadata() + self.assertEqual(mm["foo"], "foo_v") + self.assertEqual(mm.get("foo"), "foo_v") + + def test_getters_are_consistent_when_item_is_not_in_dict(self): + mm = self.create_test_metadata() + self.assertIsNone(mm.get("baz")) + with self.assertRaises(KeyError): + _ = mm["baz"] + + def test_separator_is_not_allowed_as_keys_after_init(self): + mm = self.create_test_metadata() + with self.assertRaises(KeyError): + _ = mm["Name_foo"] + + def test_namespace_works_as_intended(self): + mm = self.create_test_metadata() + self.assertEqual(mm.ns("Name")["foo"], "Name_foo_v") + self.assertIsNone(mm.ns("Name").get("bar")) + + mm_name = mm.ns("Name") + self.assertEqual(mm_name["foo"], "Name_foo_v") + self.assertIsNone(mm_name.get("bar")) + self.assertEqual(mm.ns("Name")["foo"], "Name_foo_v") + + def test_create_new_namespace(self): + # Calling ns() with an unexisting namespace should work fine. + mm = self.create_test_metadata() + mm.ns("NewName")["foo"] = "NewName_foo_v" + self.assertEqual(mm.ns("NewName")["foo"], "NewName_foo_v") + self.assertIsNone(mm.ns("NewName").get("bar")) + + def test_changing_namespace_copies_reference(self): + mm = self.create_test_metadata() + # Calling ns() copies by reference so any changes to the returned Metadata + # object is reflected in the original object. + mm_in_namespace = mm.ns("Name") + mm_in_namespace["foofoo"] = "Name_foofoo_v" + self.assertEqual(mm.ns("Name")["foofoo"], "Name_foofoo_v") + + def test_iterators(self): + mm = self.create_test_metadata() + self.assertSequenceEqual(list(mm.keys()), ["bar", "foo"]) + self.assertSequenceEqual( + list(mm.ns("Name").values()), ["Name_foo_v", "Name_baz_v"] + ) + self.assertLen(list(mm.items()), 2) + + def test_repr_str(self): + mm = self.create_test_metadata() + self.assertNotEmpty(str(mm), "") + self.assertNotEmpty(repr(mm), repr("")) + + def test_update(self): + md = common.Metadata(foo="foo_v") + md.ns("Name").update(foo="Name_foo_v", baz="Name_baz_v") + + md2 = common.Metadata() + md2.ns("Name").update(foo="Name_foo_v2", bar="Name_bar_v2") + + md.ns("Name").update(md2.ns("Name")) + + self.assertLen(md.ns("Name"), 3) + self.assertIn("bar", md.ns("Name")) + + def test_copy(self): + # There's no useful distinction to be made between copy.copy() and + # copy.deepcopy(). + mm = common.Metadata().ns("ns1") + mm.update(foo="bar") + mm_copy = copy.copy(mm) + mm_deepcopy = copy.deepcopy(mm) + # Check that copies match. + self.assertEqual(mm["foo"], "bar") + self.assertEqual(mm_copy["foo"], "bar") + self.assertEqual(mm_deepcopy["foo"], "bar") + self.assertEqual(mm_deepcopy.namespaces(), mm.namespaces()) + self.assertEqual(mm_copy.namespaces(), mm.namespaces()) + # Check that the deep copy is disconnected. + mm_deepcopy["nerf"] = "gleep" + with self.assertRaises(KeyError): + mm["nerf"] # pylint: disable=pointless-statement + with self.assertRaises(KeyError): + mm_copy["nerf"] # pylint: disable=pointless-statement + # Check that the shallow copy shares the metadata store with the original. + mm_copy["blip"] = "tonk" + self.assertEqual(mm["blip"], mm_copy["blip"]) + # ... but no sharing with the deep copy. + with self.assertRaises(KeyError): + mm_deepcopy["blip"] # pylint: disable=pointless-statement + # Here's a test for a specific bug, where Metadata._store is improperly + # disconnected from Metadata._stores. + mx = common.Metadata() + copy.copy(mx).ns("A")["a"] = "Aa" + self.assertEqual(mx.ns("A")["a"], "Aa") + + def test_construction(self): + # Test with iterables. + m0i = common.Namespace([]) + self.assertEmpty(m0i) + m0d = common.Namespace.decode("") + self.assertEmpty(m0d) + self.assertEqual(m0d, m0i) + m1i = common.Namespace(["abc"]) + self.assertLen(m1i, 1) + self.assertEqual(m1i, common.Namespace(tuple(m1i))) + self.assertEqual(m1i, common.Namespace.decode(m1i.encode())) + m2i = common.Namespace(["abc", "def"]) + self.assertLen(m2i, 2) + self.assertEqual(m2i, common.Namespace(tuple(m2i))) + self.assertEqual(m2i, common.Namespace.decode(m2i.encode())) + m3i = common.Namespace(["abc", "de:f"]) + self.assertLen(m3i, 2) + self.assertEqual(m3i, common.Namespace(tuple(m3i))) + self.assertEqual(m3i, common.Namespace.decode(m3i.encode())) + # Test with strings. + m1sc = common.Namespace.decode(":abc") + self.assertLen(m1sc, 1) + self.assertEqual(m1sc, common.Namespace(tuple(m1sc))) + self.assertEqual(m1sc, common.Namespace.decode(m1sc.encode())) + m1s = common.Namespace.decode("abc") + self.assertLen(m1s, 1) + self.assertEqual(m1s, common.Namespace(tuple(m1s))) + self.assertEqual(m1s, common.Namespace.decode(m1s.encode())) + m2s = common.Namespace.decode("abc:def") + self.assertLen(m2s, 2) + self.assertEqual(m2s, common.Namespace(tuple(m2s))) + self.assertEqual(m2s, common.Namespace.decode(m2s.encode())) + m3s = common.Namespace.decode("abc:de\\f") + self.assertLen(m3s, 2) + self.assertEqual(m3s, common.Namespace(tuple(m3s))) + self.assertEqual(m3s, common.Namespace.decode(m3s.encode())) + + def test_startswith(self): + m1 = common.Namespace(["aa", "bb"]) + self.assertTrue(m1.startswith(common.Namespace(["aa"]))) + self.assertTrue(m1.startswith(common.Namespace(["aa", "bb"]))) + self.assertTrue(m1.startswith(m1)) + self.assertTrue(m1.startswith(common.Namespace(tuple(m1)))) + self.assertFalse(m1.startswith(common.Namespace(["bb"]))) + self.assertFalse(m1.startswith(common.Namespace(["aa", "bb", "cc"]))) + self.assertFalse(m1.startswith(common.Namespace(["bb", "bb"]))) + self.assertFalse(m1.startswith(common.Namespace(["aa", "aa"]))) + + def test_subnamespace(self): + mm = common.Metadata() + mm.ns("ns1")["foo"] = "bar" + mm.ns("ns2")["foo"] = "bar" + mm.ns("ns1").ns("ns11")["foo"] = "bar" + mm.ns("ns1").ns("ns:11")["gleep"] = "nerf" + + self.assertSequenceEqual( + mm.subnamespaces(), + [ + common.Namespace(["ns1"]), + common.Namespace(["ns2"]), + common.Namespace(["ns1", "ns11"]), + common.Namespace(["ns1", "ns:11"]), + ], + ) + self.assertSequenceEqual( + mm.ns("ns1").subnamespaces(), + [ + common.Namespace([]), + common.Namespace(["ns11"]), + common.Namespace(["ns:11"]), + ], + ) + self.assertSequenceEqual(mm.ns("ns2").subnamespaces(), [common.Namespace()]) + self.assertSequenceEqual(mm.ns("ns3").subnamespaces(), []) + + def test_namespace_add(self): + n0 = common.Namespace() + self.assertEmpty(n0) + self.assertEqual(n0 + (), common.Namespace([])) + self.assertEqual( + n0 + ("ab",), + common.Namespace( + [ + "ab", + ] + ), + ) + self.assertEqual(n0 + ("a:b",), common.Namespace(["a:b"])) + self.assertEqual(n0 + ("a:b",), common.Namespace(["a:b"])) + self.assertEqual(n0 + ("ab", "cd"), common.Namespace(["ab", "cd"])) + n1 = common.Namespace(["xy"]) + self.assertLen(n1, 1) + self.assertEqual(n1 + ("ab",), common.Namespace(["xy", "ab"])) + self.assertEqual(n1 + ("a:b",), common.Namespace(["xy", "a:b"])) + self.assertEqual(n1 + ("a:b",), common.Namespace(["xy", "a:b"])) + n2 = common.Namespace(["xy", "zw"]) + self.assertLen(n2, 2) + self.assertLen(n2 + ("ab",), 3) + self.assertEqual(n2 + ("ab",), common.Namespace(["xy", "zw", "ab"])) + self.assertLen(n2 + ("ab", "cd"), 4) + self.assertEqual(n2 + ("ab", "cd"), common.Namespace.decode("xy:zw:ab:cd")) + + def test_metadata_attach(self): + # Set up a metadata tree. + mm = common.Metadata() + mm.ns("ns1").ns("ns:11").update(foo="bar") + mm.ns("ns1").ns("ns12").update(foo="gleep") + mm.ns("ns1").update(foo="nerf") + mm.ns("ns|").update(foo="pag") + # Attach that metadata tree to a branch of an empty tree. + m1 = common.Metadata() + m1.ns("ns0").ns("ns00").attach(mm) + self.assertEmpty(m1.abs_ns()) + self.assertEqual(m1.ns("ns0").ns("ns00"), mm) + self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1", "ns:11"])["foo"], "bar") + self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1", "ns12"])["foo"], "gleep") + self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1"])["foo"], "nerf") + self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns|"])["foo"], "pag") + # Attach just part of $mm to a branch of a new, empty tree. + m2 = common.Metadata() + m2.ns("nsX").attach(mm.ns("ns1")) + self.assertEqual(m2.abs_ns(["nsX", "ns:11"])["foo"], "bar") + self.assertEqual(m2.abs_ns(["nsX", "ns12"])["foo"], "gleep") + self.assertEqual(m2.abs_ns(["nsX"])["foo"], "nerf") + # Check that attach() overwrites key collisions, but preserves other data. + m3 = common.Metadata() + m3["foo"] = "Y" # This will be overwritten. + m3["z"] = "Z" # This will not be overwritten. + m3.attach(mm.ns("ns1").ns("ns:11")) + self.assertEqual(m3["z"], "Z") + self.assertEqual(m3["foo"], "bar") + + +if __name__ == "__main__": + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/context.py b/google/cloud/aiplatform/vizier/pyvizier/context.py index 4694dac4e0..230982671f 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/context.py +++ b/google/cloud/aiplatform/vizier/pyvizier/context.py @@ -11,37 +11,44 @@ @attr.s(auto_attribs=True, frozen=False, init=True, slots=True) class Context: - """Wrapper for Context proto.""" - description: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - on_setattr=attr.setters.validate) - - parameters: Dict[str, ParameterValue] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(ParameterValue), - mapping_validator=attr.validators.instance_of(dict)), - on_setattr=attr.setters.validate) # pytype: disable=wrong-arg-types - - metadata: Metadata = attr.ib( - init=True, - kw_only=True, - default=Metadata(), - validator=attr.validators.instance_of(Metadata), - on_setattr=attr.setters.validate) - - related_links: Dict[str, str] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(str), - mapping_validator=attr.validators.instance_of(dict)), - on_setattr=attr.setters.validate) # pytype: disable=wrong-arg-types + """Wrapper for Context proto.""" + + description: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + on_setattr=attr.setters.validate, + ) + + parameters: Dict[str, ParameterValue] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(ParameterValue), + mapping_validator=attr.validators.instance_of(dict), + ), + on_setattr=attr.setters.validate, + ) # pytype: disable=wrong-arg-types + + metadata: Metadata = attr.ib( + init=True, + kw_only=True, + default=Metadata(), + validator=attr.validators.instance_of(Metadata), + on_setattr=attr.setters.validate, + ) + + related_links: Dict[str, str] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(str), + mapping_validator=attr.validators.instance_of(dict), + ), + on_setattr=attr.setters.validate, + ) # pytype: disable=wrong-arg-types diff --git a/google/cloud/aiplatform/vizier/pyvizier/context_test.py b/google/cloud/aiplatform/vizier/pyvizier/context_test.py index ed9665e972..65a1648238 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/context_test.py +++ b/google/cloud/aiplatform/vizier/pyvizier/context_test.py @@ -5,14 +5,13 @@ class ContextTest(absltest.TestCase): + def testDefaultsNotShared(self): + """Make sure default parameters are not shared between instances.""" + context1 = context.Context() + context2 = context.Context() + context1.parameters["x1"] = context.ParameterValue(5) + self.assertEmpty(context2.parameters) - def testDefaultsNotShared(self): - """Make sure default parameters are not shared between instances.""" - context1 = context.Context() - context2 = context.Context() - context1.parameters['x1'] = context.ParameterValue(5) - self.assertEmpty(context2.parameters) - -if __name__ == '__main__': - absltest.main() +if __name__ == "__main__": + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py index 3a586dff9f..1a8069da59 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py +++ b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py @@ -1,93 +1,103 @@ """Utility functions for handling vizier metadata.""" from typing import Tuple, Union, Optional, TypeVar, Type -#from vizier.service import key_value_pb2 + +# from vizier.service import key_value_pb2 from google.cloud.aiplatform.compat.types import study as study_pb2 from google.protobuf import any_pb2 from google.protobuf.message import Message -T = TypeVar('T') +T = TypeVar("T") def assign( - container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, - ns: str, value: Union[str, any_pb2.Any, - Message]) : #-> Tuple[key_value_pb2.KeyValue, bool]: - """Insert or assign (key, value) to container.metadata. - - Args: - container: container.metadata must be repeated KeyValue (protobuf) field. - key: - ns: A namespace for the key (defaults to '', which is the user's namespace). - value: Behavior depends on the type. `str` is copied to KeyValue.value - `any_pb2.Any` is copied to KeyValue.proto Other types are packed to - any_pb2.Any proto, which is then copied to KeyValue.proto. - - Returns: - (proto, inserted) where - proto is the protobuf that was just inserted into the $container, and - inserted is True if the proto was newly inserted, False if it was replaced. - """ - - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if isinstance(value, str): - kv.ClearField('proto') - kv.value = value - elif isinstance(value, any_pb2.Any): - kv.ClearField('value') - kv.proto.CopyFrom(value) - else: - kv.ClearField('value') - kv.proto.Pack(value) - return kv, False - - # The key does not exist in the metadata. - if isinstance(value, str): - metadata = container.metadata.add(key=key, ns=ns, value=value) - elif isinstance(value, any_pb2.Any): - metadata = container.metadata.add(key=key, ns=ns, proto=value) - else: - metadata = container.metadata.add(key=key, ns=ns) - metadata.proto.Pack(value) - return metadata, True - - -def get(container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, - ns: str) -> Optional[str]: - """Returns the metadata value associated with key, or None. - - Args: - container: A Trial of a StudySpec in protobuf form. - key: The key of a KeyValue protobuf. - ns: A namespace for the key (defaults to '', which is the user's namespace). - """ - - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if not kv.HasField('proto'): - return kv.value - return None - - -def get_proto(container: Union[study_pb2.StudySpec, study_pb2.Trial], *, - key: str, ns: str, cls: Type[T]) -> Optional[T]: - """Unpacks the proto metadata into message. - - Args: - container: (const) StudySpec or Trial to search the metadata from. - key: (const) Lookup key of the metadata. - ns: A namespace for the key (defaults to '', which is the user's namespace). - cls: Pass in a proto ***class***, not a proto object. - - Returns: - Proto message, if the value associated with the key exists and - can be parsed into proto; None otherwise. - """ - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if kv.HasField('proto'): - message = cls() - success = kv.proto.Unpack(message) - return message if success else None - return None + container: Union[study_pb2.StudySpec, study_pb2.Trial], + *, + key: str, + ns: str, + value: Union[str, any_pb2.Any, Message] +): # -> Tuple[key_value_pb2.KeyValue, bool]: + """Insert or assign (key, value) to container.metadata. + + Args: + container: container.metadata must be repeated KeyValue (protobuf) field. + key: + ns: A namespace for the key (defaults to '', which is the user's namespace). + value: Behavior depends on the type. `str` is copied to KeyValue.value + `any_pb2.Any` is copied to KeyValue.proto Other types are packed to + any_pb2.Any proto, which is then copied to KeyValue.proto. + + Returns: + (proto, inserted) where + proto is the protobuf that was just inserted into the $container, and + inserted is True if the proto was newly inserted, False if it was replaced. + """ + + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if isinstance(value, str): + kv.ClearField("proto") + kv.value = value + elif isinstance(value, any_pb2.Any): + kv.ClearField("value") + kv.proto.CopyFrom(value) + else: + kv.ClearField("value") + kv.proto.Pack(value) + return kv, False + + # The key does not exist in the metadata. + if isinstance(value, str): + metadata = container.metadata.add(key=key, ns=ns, value=value) + elif isinstance(value, any_pb2.Any): + metadata = container.metadata.add(key=key, ns=ns, proto=value) + else: + metadata = container.metadata.add(key=key, ns=ns) + metadata.proto.Pack(value) + return metadata, True + + +def get( + container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, ns: str +) -> Optional[str]: + """Returns the metadata value associated with key, or None. + + Args: + container: A Trial of a StudySpec in protobuf form. + key: The key of a KeyValue protobuf. + ns: A namespace for the key (defaults to '', which is the user's namespace). + """ + + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if not kv.HasField("proto"): + return kv.value + return None + + +def get_proto( + container: Union[study_pb2.StudySpec, study_pb2.Trial], + *, + key: str, + ns: str, + cls: Type[T] +) -> Optional[T]: + """Unpacks the proto metadata into message. + + Args: + container: (const) StudySpec or Trial to search the metadata from. + key: (const) Lookup key of the metadata. + ns: A namespace for the key (defaults to '', which is the user's namespace). + cls: Pass in a proto ***class***, not a proto object. + + Returns: + Proto message, if the value associated with the key exists and + can be parsed into proto; None otherwise. + """ + for kv in container.metadata: + if kv.key == key and kv.ns == ns: + if kv.HasField("proto"): + message = cls() + success = kv.proto.Unpack(message) + return message if success else None + return None diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py index 528f6ec0f5..13390d49dd 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py @@ -15,22 +15,24 @@ class ParameterType(enum.Enum): - """Valid Values for ParameterConfig.type.""" - DOUBLE = 'DOUBLE' - INTEGER = 'INTEGER' - CATEGORICAL = 'CATEGORICAL' - DISCRETE = 'DISCRETE' + """Valid Values for ParameterConfig.type.""" - def is_numeric(self) -> bool: - return self in [self.DOUBLE, self.INTEGER, self.DISCRETE] + DOUBLE = "DOUBLE" + INTEGER = "INTEGER" + CATEGORICAL = "CATEGORICAL" + DISCRETE = "DISCRETE" + + def is_numeric(self) -> bool: + return self in [self.DOUBLE, self.INTEGER, self.DISCRETE] class ScaleType(enum.Enum): - """Valid Values for ParameterConfig.scale_type.""" - LINEAR = 'LINEAR' - LOG = 'LOG' - REVERSE_LOG = 'REVERSE_LOG' - UNIFORM_DISCRETE = 'UNIFORM_DISCRETE' + """Valid Values for ParameterConfig.scale_type.""" + + LINEAR = "LINEAR" + LOG = "LOG" + REVERSE_LOG = "REVERSE_LOG" + UNIFORM_DISCRETE = "UNIFORM_DISCRETE" # A sequence of possible internal parameter values. @@ -39,491 +41,548 @@ class ScaleType(enum.Enum): def _validate_bounds(bounds: Union[Tuple[int, int], Tuple[float, float]]): - """Validates the bounds.""" - if len(bounds) != 2: - raise ValueError('Bounds must have length 2. Given: {}'.format(bounds)) - lower = bounds[0] - upper = bounds[1] - if not all([math.isfinite(v) for v in (lower, upper)]): - raise ValueError( - 'Both "lower" and "upper" must be finite. Given: (%f, %f)' % - (lower, upper)) - if lower > upper: - raise ValueError( - 'Lower cannot be greater than upper: given lower={} upper={}'.format( - lower, upper)) + """Validates the bounds.""" + if len(bounds) != 2: + raise ValueError("Bounds must have length 2. Given: {}".format(bounds)) + lower = bounds[0] + upper = bounds[1] + if not all([math.isfinite(v) for v in (lower, upper)]): + raise ValueError( + 'Both "lower" and "upper" must be finite. Given: (%f, %f)' % (lower, upper) + ) + if lower > upper: + raise ValueError( + "Lower cannot be greater than upper: given lower={} upper={}".format( + lower, upper + ) + ) def _get_feasible_points_and_bounds( - feasible_values: Sequence[float] + feasible_values: Sequence[float], ) -> Tuple[List[float], Union[Tuple[int, int], Tuple[float, float]]]: - """Validates and converts feasible values to floats.""" - if not all([math.isfinite(p) for p in feasible_values]): - raise ValueError('Feasible values must all be finite. Given: {}' % - feasible_values) + """Validates and converts feasible values to floats.""" + if not all([math.isfinite(p) for p in feasible_values]): + raise ValueError( + "Feasible values must all be finite. Given: {}" % feasible_values + ) - feasible_points = list(sorted(feasible_values)) - bounds = (feasible_points[0], feasible_points[-1]) - return feasible_points, bounds + feasible_points = list(sorted(feasible_values)) + bounds = (feasible_points[0], feasible_points[-1]) + return feasible_points, bounds def _get_categories(categories: Sequence[str]) -> List[str]: - """Returns the categories.""" - return sorted(list(categories)) + """Returns the categories.""" + return sorted(list(categories)) def _get_default_value( - param_type: ParameterType, - default_value: Union[float, int, str]) -> Union[float, int, str]: - """Validates and converts the default_value to the right type.""" - if (param_type in (ParameterType.DOUBLE, ParameterType.DISCRETE) and - (isinstance(default_value, float) or isinstance(default_value, int))): - return float(default_value) - elif (param_type == ParameterType.INTEGER and - (isinstance(default_value, float) or isinstance(default_value, int))): - if isinstance(default_value, int): - return default_value - else: - # Check if the float rounds nicely. - default_int_value = round(default_value) - if not math.isclose(default_value, default_int_value): - raise ValueError('default_value for an INTEGER parameter should be an ' - 'integer, got float: [{}]'.format(default_value)) - return default_int_value - elif (param_type == ParameterType.CATEGORICAL and - isinstance(default_value, str)): - return default_value - raise ValueError( - 'default_value has an incorrect type. ParameterType has type {}, ' - 'but default_value has type {}'.format(param_type.name, - type(default_value))) + param_type: ParameterType, default_value: Union[float, int, str] +) -> Union[float, int, str]: + """Validates and converts the default_value to the right type.""" + if param_type in (ParameterType.DOUBLE, ParameterType.DISCRETE) and ( + isinstance(default_value, float) or isinstance(default_value, int) + ): + return float(default_value) + elif param_type == ParameterType.INTEGER and ( + isinstance(default_value, float) or isinstance(default_value, int) + ): + if isinstance(default_value, int): + return default_value + else: + # Check if the float rounds nicely. + default_int_value = round(default_value) + if not math.isclose(default_value, default_int_value): + raise ValueError( + "default_value for an INTEGER parameter should be an " + "integer, got float: [{}]".format(default_value) + ) + return default_int_value + elif param_type == ParameterType.CATEGORICAL and isinstance(default_value, str): + return default_value + raise ValueError( + "default_value has an incorrect type. ParameterType has type {}, " + "but default_value has type {}".format(param_type.name, type(default_value)) + ) @attr.s(auto_attribs=True, frozen=True, init=True, slots=True) class ParameterConfig: - """A Vizier ParameterConfig. - - Use ParameterConfig.factory to create a valid instance. - """ - _name: str = attr.ib( - init=True, validator=attr.validators.instance_of(str), kw_only=True) - _type: ParameterType = attr.ib( - init=True, - validator=attr.validators.instance_of(ParameterType), - repr=lambda v: v.name if v is not None else 'None', - kw_only=True) - # Only one of _feasible_values, _bounds will be set at any given time. - _bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float)), - iterable_validator=attr.validators.instance_of(tuple))), - kw_only=True) - _feasible_values: Optional[MonotypeParameterList] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of((list, tuple)))), - kw_only=True) - _scale_type: Optional[ScaleType] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.instance_of(ScaleType)), - repr=lambda v: v.name if v is not None else 'None', - kw_only=True) - _default_value: Optional[Union[float, int, str]] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.instance_of((float, int, str))), - kw_only=True) - _external_type: Optional[ExternalType] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.instance_of(ExternalType)), - repr=lambda v: v.name if v is not None else 'None', - kw_only=True) - # Parent values for this ParameterConfig. If set, then this is a child - # ParameterConfig. - _matching_parent_values: Optional[MonotypeParameterList] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of((list, tuple)))), - kw_only=True) - # Children ParameterConfig. If set, then this is a parent ParameterConfig. - _child_parameter_configs: Optional[List['ParameterConfig']] = attr.ib( - init=True, kw_only=True) - - # Pytype treats instances of EnumTypeWrapper as types, but they can't be - # evaluated at runtime, so a Union[] of proto enums has to be a forward - # reference below. - @classmethod - def factory( - cls, - name: str, - *, - bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None, - feasible_values: Optional[MonotypeParameterSequence] = None, - children: Optional[Sequence[Tuple[MonotypeParameterSequence, - 'ParameterConfig']]] = None, - scale_type: Optional[ScaleType] = None, - default_value: Optional[Union[float, int, str]] = None, - external_type: Optional[ExternalType] = ExternalType.INTERNAL - ) -> 'ParameterConfig': - """Factory method. - - Args: - name: The parameter's name. Cannot be empty. - bounds: REQUIRED for INTEGER or DOUBLE type. Specifies (min, max). The - type of (min, max) determines the created ParameterConfig's type. - feasible_values: REQUIRED for DISCRETE or CATEGORICAL type. The elements' - type determines the created ParameterConfig's type. - children: sequence of tuples formatted as: (matching_parent_values, - ParameterConfig). See - cs/learning_vizier.service.ParameterConfig.child_parameter_configs for - details. ONLY THE TYPES ARE VALIDATED. If the child ParameterConfig - protos already have parent values set, they will be overridden by the - provided matching_parent_values. - scale_type: Scaling to be applied. NOT VALIDATED. - default_value: A default value for the Parameter. - external_type: An annotation indicating the type this parameter should be - cast to. - - Returns: - A ParameterConfig object which wraps a partially validated proto. - - Raises: - ValueError: Exactly one of feasible_values and bounds must be convertible - to Boolean true. Bounds and numeric feasible_values must be finite. - Bounds and feasible_values, if provided, must consist of - elements of the same type. - TypeError: If children's matching_parent_values are not compatible with - the ParameterConfig being created. - """ - if not name: - raise ValueError('Parameter name cannot be empty.') - - if bool(feasible_values) == bool(bounds): - raise ValueError( - 'While creating Parameter with name={}: exactly one of ' - '"feasible_values" or "bounds" must be provided, but given ' - 'feasible_values={} and bounds={}.'.format(name, feasible_values, - bounds)) - if feasible_values: - if len(set(feasible_values)) != len(feasible_values): - counter = collections.Counter(feasible_values) - duplicate_dict = {k: v for k, v in counter.items() if v > 1} - raise ValueError( - 'Feasible values cannot have duplicates: {}'.format(duplicate_dict)) - if all(isinstance(v, (float, int)) for v in feasible_values): - inferred_type = ParameterType.DISCRETE - feasible_values, bounds = _get_feasible_points_and_bounds( - feasible_values) - elif all(isinstance(v, str) for v in feasible_values): - inferred_type = ParameterType.CATEGORICAL - feasible_values = _get_categories(feasible_values) - else: - raise ValueError( - 'Feasible values must all be numeric or strings. Given {}'.format( - feasible_values)) - else: # bounds were specified. - if isinstance(bounds[0], int) and isinstance(bounds[1], int): - inferred_type = ParameterType.INTEGER - _validate_bounds(bounds) - elif isinstance(bounds[0], float) and isinstance(bounds[1], float): - inferred_type = ParameterType.DOUBLE - _validate_bounds(bounds) - else: - raise ValueError( - 'Bounds must both be integers or doubles. Given: {}'.format(bounds)) - - if default_value is not None: - default_value = _get_default_value(inferred_type, default_value) - - pc = cls( - name=name, - type=inferred_type, - bounds=bounds, - feasible_values=feasible_values, - scale_type=scale_type, - default_value=default_value, - external_type=external_type, - matching_parent_values=None, - child_parameter_configs=None) - if children: - pc = pc.add_children(children) - return pc - - @property - def name(self) -> str: - return self._name - - @property - def type(self) -> ParameterType: - return self._type - - @property - def external_type(self) -> ExternalType: - return self._external_type - - @property - def scale_type(self) -> Optional[ScaleType]: - return self._scale_type - - @property - def bounds(self) -> Union[Tuple[float, float], Tuple[int, int]]: - """Returns the bounds, if set, or raises a ValueError.""" - if self.type == ParameterType.CATEGORICAL: - raise ValueError('Accessing bounds of a categorical parameter: %s' % - self.name) - return self._bounds - - @property - def matching_parent_values(self) -> MonotypeParameterList: - """Returns the matching parent values, if this is a child parameter.""" - if not self._matching_parent_values: - return [] - return copy.copy(self._matching_parent_values) - - @property - def child_parameter_configs(self) -> List['ParameterConfig']: - if not self._child_parameter_configs: - return [] - return copy.deepcopy(self._child_parameter_configs) - - def _del_child_parameter_configs(self): - """Deletes the current child ParameterConfigs.""" - object.__setattr__(self, '_child_parameter_configs', None) - - @property - def clone_without_children(self) -> 'ParameterConfig': - """Returns the clone of self, without child_parameter_configs.""" - clone = copy.deepcopy(self) - clone._del_child_parameter_configs() # pylint: disable='protected-access' - return clone - - @property - def feasible_values(self) -> Union[List[int], List[float], List[str]]: - if self.type in (ParameterType.DISCRETE, ParameterType.CATEGORICAL): - if not self._feasible_values: - return [] - return copy.copy(self._feasible_values) - elif self.type == ParameterType.INTEGER: - return list(range(self.bounds[0], self.bounds[1] + 1)) - raise ValueError('feasible_values is invalid for type: %s' % self.type) - - @property - def default_value(self) -> Optional[Union[int, float, str]]: - """Returns the default value, or None if not set.""" - return self._default_value - - def _set_matching_parent_values(self, - parent_values: MonotypeParameterSequence): - """Sets the given matching parent values in this object, without validation. - - Args: - parent_values: Parent values for which this child ParameterConfig is - active. Existing values will be replaced. - """ - object.__setattr__(self, '_matching_parent_values', list(parent_values)) - - def _set_child_parameter_configs(self, children: List['ParameterConfig']): - """Sets the given child ParameterConfigs in this object, without validation. + """A Vizier ParameterConfig. - Args: - children: The children to set in this object. Existing children will be - replaced. + Use ParameterConfig.factory to create a valid instance. """ - object.__setattr__(self, '_child_parameter_configs', children) - - def add_children( - self, new_children: Sequence[Tuple[MonotypeParameterSequence, - 'ParameterConfig']] - ) -> 'ParameterConfig': - """Clones the ParameterConfig and adds new children to it. - - Args: - new_children: A sequence of tuples formatted as: (matching_parent_values, - ParameterConfig). If the child ParameterConfig have pre-existing parent - values, they will be overridden. - Returns: - A parent parameter config, with children set. - - Raises: - ValueError: If the child configs are invalid - TypeError: If matching parent values are invalid - """ - parent = copy.deepcopy(self) - if not new_children: - return parent - - for child_pair in new_children: - if len(child_pair) != 2: - raise ValueError('Each element in new_children must be a tuple of ' - '(Sequence of valid parent values, ParameterConfig),' - ' given: {}'.format(child_pair)) - - logging.debug('add_children: new_children=%s', new_children) - child_parameter_configs = parent.child_parameter_configs - for unsorted_parent_values, child in new_children: - parent_values = sorted(unsorted_parent_values) - child_copy = copy.deepcopy(child) - if parent.type == ParameterType.DISCRETE: - if not all(isinstance(v, (float, int)) for v in parent_values): - raise TypeError('Parent is DISCRETE-typed, but a child is specifying ' - 'one or more non float/int parent values: child={} ' - ', parent_values={}'.format(child, parent_values)) - child_copy._set_matching_parent_values(parent_values) # pylint: disable='protected-access' - elif parent.type == ParameterType.CATEGORICAL: - if not all(isinstance(v, str) for v in parent_values): - raise TypeError('Parent is CATEGORICAL-typed, but a child is ' - 'specifying one or more non float/int parent values: ' - 'child={}, parent_values={}'.format( - child, parent_values)) - child_copy._set_matching_parent_values(parent_values) # pylint: disable='protected-access' - elif parent.type == ParameterType.INTEGER: - # Allow {int, float}->float conversion but block str->float conversion. - int_values = [int(v) for v in parent_values] - if int_values != parent_values: - raise TypeError( - 'Parent is INTEGER-typed, but a child is specifying one or more ' - 'non-integral parent values: {}'.format(parent_values)) - child_copy._set_matching_parent_values(int_values) # pylint: disable='protected-access' - else: - raise ValueError('DOUBLE type cannot have child parameters') - child_parameter_configs.extend([child_copy]) - parent._set_child_parameter_configs(child_parameter_configs) # pylint: disable='protected-access' - return parent - - def continuify(self) -> 'ParameterConfig': - """Returns a newly created DOUBLE parameter with the same range.""" - if self.type == ParameterType.DOUBLE: - return copy.deepcopy(self) - elif not ParameterType.is_numeric(self.type): - raise ValueError( - 'Cannot convert a non-numeric parameter to DOUBLE: {}'.format(self)) - elif self._child_parameter_configs: - raise ValueError( - 'Cannot convert a parent parameter to DOUBLE: {}'.format(self)) - - scale_type = self.scale_type - if scale_type == ScaleType.UNIFORM_DISCRETE: - logging.log_every_n( - logging.WARNING, - 'Converting a UNIFORM_DISCRETE scaled discrete parameter ' - 'to DOUBLE: %s', 10, self) - scale_type = None - - default_value = self.default_value - if default_value is not None: - default_value = float(default_value) - return ParameterConfig.factory( - self.name, - bounds=(float(self.bounds[0]), float(self.bounds[1])), - scale_type=scale_type, - default_value=default_value) - - @classmethod - def merge(cls, one: 'ParameterConfig', - other: 'ParameterConfig') -> 'ParameterConfig': - """Merge two ParameterConfigs. - - Args: - one: ParameterConfig with no child parameters. - other: Must have the same type as one, and may not have child parameters. - - Returns: - For Categorical, Discrete or Integer ParameterConfigs, the resulting - config will be the union of all feasible values. - For Double ParameterConfigs, the resulting config will have [min_value, - max_value] set to the smallest and largest bounds. - - Raises: - ValueError: If any of the input configs has child parameters, or if - the two parameters have different types. - """ - if one.child_parameter_configs or other.child_parameter_configs: - raise ValueError( - 'Cannot merge parameters with child_parameter_configs: %s and %s' % - one, other) - if one.type != other.type: - raise ValueError('Type conflicts between {} and {}'.format( - one.type.name, other.type.name)) - if one.scale_type != other.scale_type: - logging.warning('Scale type conflicts while merging %s and %s', one, - other) - - if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): - new_feasible_values = list( - set(one.feasible_values + other.feasible_values)) - return ParameterConfig.factory( - name=one.name, - feasible_values=new_feasible_values, - scale_type=one.scale_type) - elif one.type in (ParameterType.INTEGER, ParameterType.DOUBLE): - original_min, original_max = one.bounds - other_min, other_max = other.bounds - new_bounds = (min(original_min, other_min), max(original_max, other_max)) - return ParameterConfig.factory( - name=one.name, bounds=new_bounds, scale_type=one.scale_type) - raise ValueError('Unknown type {}. This is currently' - 'an unreachable code.'.format(one.type)) - - def traverse( - self, - show_children: bool = False) -> Generator['ParameterConfig', None, None]: - """DFS Generator for parameter configs. - - Args: - show_children: If True, every generated ParameterConfig has - child_parameter_configs. For example, if 'foo' has two child configs - 'bar1' and 'bar2', then traversing 'foo' with show_children=True - generates (foo, with bar1,bar2 as children), (bar1), and (bar2). If - show_children=False, it generates (foo, without children), (bar1), and - (bar2). - - Yields: - DFS on all parameter configs. - """ - if show_children: - yield self - else: - yield self.clone_without_children - for child in self.child_parameter_configs: - yield from child.traverse(show_children) - - def contains( - self, value: Union[trial.ParameterValueTypes, - trial.ParameterValue]) -> bool: - """Check if the `value` is a valid value for this parameter config.""" - if not isinstance(value, trial.ParameterValue): - value = trial.ParameterValue(value) - - if self.type == ParameterType.DOUBLE: - return self.bounds[0] <= value.as_float and value.as_float <= self.bounds[ - 1] - elif self.type == ParameterType.INTEGER: - if value.as_int != value.as_float: - return False - return self.bounds[0] <= value.as_int and value.as_int <= self.bounds[1] - elif self.type == ParameterType.DISCRETE: - return value.as_float in self.feasible_values - elif self.type == ParameterType.CATEGORICAL: - return value.as_str in self.feasible_values - else: - raise NotImplementedError(f'Cannot determine whether {value} is feasible' - f'for Unknown parameter type {self.type}.\n' - f'Full config: {repr(self)}') - - @property - def num_feasible_values(self) -> Union[float, int]: - if self.type == ParameterType.DOUBLE: - return float('inf') - elif self.type == ParameterType.INTEGER: - return self.bounds[1] - self.bounds[0] + 1 - else: - return len(self.feasible_values) + _name: str = attr.ib( + init=True, validator=attr.validators.instance_of(str), kw_only=True + ) + _type: ParameterType = attr.ib( + init=True, + validator=attr.validators.instance_of(ParameterType), + repr=lambda v: v.name if v is not None else "None", + kw_only=True, + ) + # Only one of _feasible_values, _bounds will be set at any given time. + _bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float)), + iterable_validator=attr.validators.instance_of(tuple), + ) + ), + kw_only=True, + ) + _feasible_values: Optional[MonotypeParameterList] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of((list, tuple)), + ) + ), + kw_only=True, + ) + _scale_type: Optional[ScaleType] = attr.ib( + init=True, + validator=attr.validators.optional(attr.validators.instance_of(ScaleType)), + repr=lambda v: v.name if v is not None else "None", + kw_only=True, + ) + _default_value: Optional[Union[float, int, str]] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.instance_of((float, int, str)) + ), + kw_only=True, + ) + _external_type: Optional[ExternalType] = attr.ib( + init=True, + validator=attr.validators.optional(attr.validators.instance_of(ExternalType)), + repr=lambda v: v.name if v is not None else "None", + kw_only=True, + ) + # Parent values for this ParameterConfig. If set, then this is a child + # ParameterConfig. + _matching_parent_values: Optional[MonotypeParameterList] = attr.ib( + init=True, + validator=attr.validators.optional( + attr.validators.deep_iterable( + member_validator=attr.validators.instance_of((int, float, str)), + iterable_validator=attr.validators.instance_of((list, tuple)), + ) + ), + kw_only=True, + ) + # Children ParameterConfig. If set, then this is a parent ParameterConfig. + _child_parameter_configs: Optional[List["ParameterConfig"]] = attr.ib( + init=True, kw_only=True + ) + + # Pytype treats instances of EnumTypeWrapper as types, but they can't be + # evaluated at runtime, so a Union[] of proto enums has to be a forward + # reference below. + @classmethod + def factory( + cls, + name: str, + *, + bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None, + feasible_values: Optional[MonotypeParameterSequence] = None, + children: Optional[ + Sequence[Tuple[MonotypeParameterSequence, "ParameterConfig"]] + ] = None, + scale_type: Optional[ScaleType] = None, + default_value: Optional[Union[float, int, str]] = None, + external_type: Optional[ExternalType] = ExternalType.INTERNAL, + ) -> "ParameterConfig": + """Factory method. + + Args: + name: The parameter's name. Cannot be empty. + bounds: REQUIRED for INTEGER or DOUBLE type. Specifies (min, max). The + type of (min, max) determines the created ParameterConfig's type. + feasible_values: REQUIRED for DISCRETE or CATEGORICAL type. The elements' + type determines the created ParameterConfig's type. + children: sequence of tuples formatted as: (matching_parent_values, + ParameterConfig). See + cs/learning_vizier.service.ParameterConfig.child_parameter_configs for + details. ONLY THE TYPES ARE VALIDATED. If the child ParameterConfig + protos already have parent values set, they will be overridden by the + provided matching_parent_values. + scale_type: Scaling to be applied. NOT VALIDATED. + default_value: A default value for the Parameter. + external_type: An annotation indicating the type this parameter should be + cast to. + + Returns: + A ParameterConfig object which wraps a partially validated proto. + + Raises: + ValueError: Exactly one of feasible_values and bounds must be convertible + to Boolean true. Bounds and numeric feasible_values must be finite. + Bounds and feasible_values, if provided, must consist of + elements of the same type. + TypeError: If children's matching_parent_values are not compatible with + the ParameterConfig being created. + """ + if not name: + raise ValueError("Parameter name cannot be empty.") + + if bool(feasible_values) == bool(bounds): + raise ValueError( + "While creating Parameter with name={}: exactly one of " + '"feasible_values" or "bounds" must be provided, but given ' + "feasible_values={} and bounds={}.".format( + name, feasible_values, bounds + ) + ) + if feasible_values: + if len(set(feasible_values)) != len(feasible_values): + counter = collections.Counter(feasible_values) + duplicate_dict = {k: v for k, v in counter.items() if v > 1} + raise ValueError( + "Feasible values cannot have duplicates: {}".format(duplicate_dict) + ) + if all(isinstance(v, (float, int)) for v in feasible_values): + inferred_type = ParameterType.DISCRETE + feasible_values, bounds = _get_feasible_points_and_bounds( + feasible_values + ) + elif all(isinstance(v, str) for v in feasible_values): + inferred_type = ParameterType.CATEGORICAL + feasible_values = _get_categories(feasible_values) + else: + raise ValueError( + "Feasible values must all be numeric or strings. Given {}".format( + feasible_values + ) + ) + else: # bounds were specified. + if isinstance(bounds[0], int) and isinstance(bounds[1], int): + inferred_type = ParameterType.INTEGER + _validate_bounds(bounds) + elif isinstance(bounds[0], float) and isinstance(bounds[1], float): + inferred_type = ParameterType.DOUBLE + _validate_bounds(bounds) + else: + raise ValueError( + "Bounds must both be integers or doubles. Given: {}".format(bounds) + ) + + if default_value is not None: + default_value = _get_default_value(inferred_type, default_value) + + pc = cls( + name=name, + type=inferred_type, + bounds=bounds, + feasible_values=feasible_values, + scale_type=scale_type, + default_value=default_value, + external_type=external_type, + matching_parent_values=None, + child_parameter_configs=None, + ) + if children: + pc = pc.add_children(children) + return pc + + @property + def name(self) -> str: + return self._name + + @property + def type(self) -> ParameterType: + return self._type + + @property + def external_type(self) -> ExternalType: + return self._external_type + + @property + def scale_type(self) -> Optional[ScaleType]: + return self._scale_type + + @property + def bounds(self) -> Union[Tuple[float, float], Tuple[int, int]]: + """Returns the bounds, if set, or raises a ValueError.""" + if self.type == ParameterType.CATEGORICAL: + raise ValueError( + "Accessing bounds of a categorical parameter: %s" % self.name + ) + return self._bounds + + @property + def matching_parent_values(self) -> MonotypeParameterList: + """Returns the matching parent values, if this is a child parameter.""" + if not self._matching_parent_values: + return [] + return copy.copy(self._matching_parent_values) + + @property + def child_parameter_configs(self) -> List["ParameterConfig"]: + if not self._child_parameter_configs: + return [] + return copy.deepcopy(self._child_parameter_configs) + + def _del_child_parameter_configs(self): + """Deletes the current child ParameterConfigs.""" + object.__setattr__(self, "_child_parameter_configs", None) + + @property + def clone_without_children(self) -> "ParameterConfig": + """Returns the clone of self, without child_parameter_configs.""" + clone = copy.deepcopy(self) + clone._del_child_parameter_configs() # pylint: disable='protected-access' + return clone + + @property + def feasible_values(self) -> Union[List[int], List[float], List[str]]: + if self.type in (ParameterType.DISCRETE, ParameterType.CATEGORICAL): + if not self._feasible_values: + return [] + return copy.copy(self._feasible_values) + elif self.type == ParameterType.INTEGER: + return list(range(self.bounds[0], self.bounds[1] + 1)) + raise ValueError("feasible_values is invalid for type: %s" % self.type) + + @property + def default_value(self) -> Optional[Union[int, float, str]]: + """Returns the default value, or None if not set.""" + return self._default_value + + def _set_matching_parent_values(self, parent_values: MonotypeParameterSequence): + """Sets the given matching parent values in this object, without validation. + + Args: + parent_values: Parent values for which this child ParameterConfig is + active. Existing values will be replaced. + """ + object.__setattr__(self, "_matching_parent_values", list(parent_values)) + + def _set_child_parameter_configs(self, children: List["ParameterConfig"]): + """Sets the given child ParameterConfigs in this object, without validation. + + Args: + children: The children to set in this object. Existing children will be + replaced. + """ + object.__setattr__(self, "_child_parameter_configs", children) + + def add_children( + self, + new_children: Sequence[Tuple[MonotypeParameterSequence, "ParameterConfig"]], + ) -> "ParameterConfig": + """Clones the ParameterConfig and adds new children to it. + + Args: + new_children: A sequence of tuples formatted as: (matching_parent_values, + ParameterConfig). If the child ParameterConfig have pre-existing parent + values, they will be overridden. + + Returns: + A parent parameter config, with children set. + + Raises: + ValueError: If the child configs are invalid + TypeError: If matching parent values are invalid + """ + parent = copy.deepcopy(self) + if not new_children: + return parent + + for child_pair in new_children: + if len(child_pair) != 2: + raise ValueError( + "Each element in new_children must be a tuple of " + "(Sequence of valid parent values, ParameterConfig)," + " given: {}".format(child_pair) + ) + + logging.debug("add_children: new_children=%s", new_children) + child_parameter_configs = parent.child_parameter_configs + for unsorted_parent_values, child in new_children: + parent_values = sorted(unsorted_parent_values) + child_copy = copy.deepcopy(child) + if parent.type == ParameterType.DISCRETE: + if not all(isinstance(v, (float, int)) for v in parent_values): + raise TypeError( + "Parent is DISCRETE-typed, but a child is specifying " + "one or more non float/int parent values: child={} " + ", parent_values={}".format(child, parent_values) + ) + child_copy._set_matching_parent_values( + parent_values + ) # pylint: disable='protected-access' + elif parent.type == ParameterType.CATEGORICAL: + if not all(isinstance(v, str) for v in parent_values): + raise TypeError( + "Parent is CATEGORICAL-typed, but a child is " + "specifying one or more non float/int parent values: " + "child={}, parent_values={}".format(child, parent_values) + ) + child_copy._set_matching_parent_values( + parent_values + ) # pylint: disable='protected-access' + elif parent.type == ParameterType.INTEGER: + # Allow {int, float}->float conversion but block str->float conversion. + int_values = [int(v) for v in parent_values] + if int_values != parent_values: + raise TypeError( + "Parent is INTEGER-typed, but a child is specifying one or more " + "non-integral parent values: {}".format(parent_values) + ) + child_copy._set_matching_parent_values( + int_values + ) # pylint: disable='protected-access' + else: + raise ValueError("DOUBLE type cannot have child parameters") + child_parameter_configs.extend([child_copy]) + parent._set_child_parameter_configs( + child_parameter_configs + ) # pylint: disable='protected-access' + return parent + + def continuify(self) -> "ParameterConfig": + """Returns a newly created DOUBLE parameter with the same range.""" + if self.type == ParameterType.DOUBLE: + return copy.deepcopy(self) + elif not ParameterType.is_numeric(self.type): + raise ValueError( + "Cannot convert a non-numeric parameter to DOUBLE: {}".format(self) + ) + elif self._child_parameter_configs: + raise ValueError( + "Cannot convert a parent parameter to DOUBLE: {}".format(self) + ) + + scale_type = self.scale_type + if scale_type == ScaleType.UNIFORM_DISCRETE: + logging.log_every_n( + logging.WARNING, + "Converting a UNIFORM_DISCRETE scaled discrete parameter " + "to DOUBLE: %s", + 10, + self, + ) + scale_type = None + + default_value = self.default_value + if default_value is not None: + default_value = float(default_value) + return ParameterConfig.factory( + self.name, + bounds=(float(self.bounds[0]), float(self.bounds[1])), + scale_type=scale_type, + default_value=default_value, + ) + + @classmethod + def merge( + cls, one: "ParameterConfig", other: "ParameterConfig" + ) -> "ParameterConfig": + """Merge two ParameterConfigs. + + Args: + one: ParameterConfig with no child parameters. + other: Must have the same type as one, and may not have child parameters. + + Returns: + For Categorical, Discrete or Integer ParameterConfigs, the resulting + config will be the union of all feasible values. + For Double ParameterConfigs, the resulting config will have [min_value, + max_value] set to the smallest and largest bounds. + + Raises: + ValueError: If any of the input configs has child parameters, or if + the two parameters have different types. + """ + if one.child_parameter_configs or other.child_parameter_configs: + raise ValueError( + "Cannot merge parameters with child_parameter_configs: %s and %s" % one, + other, + ) + if one.type != other.type: + raise ValueError( + "Type conflicts between {} and {}".format( + one.type.name, other.type.name + ) + ) + if one.scale_type != other.scale_type: + logging.warning("Scale type conflicts while merging %s and %s", one, other) + + if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): + new_feasible_values = list(set(one.feasible_values + other.feasible_values)) + return ParameterConfig.factory( + name=one.name, + feasible_values=new_feasible_values, + scale_type=one.scale_type, + ) + elif one.type in (ParameterType.INTEGER, ParameterType.DOUBLE): + original_min, original_max = one.bounds + other_min, other_max = other.bounds + new_bounds = (min(original_min, other_min), max(original_max, other_max)) + return ParameterConfig.factory( + name=one.name, bounds=new_bounds, scale_type=one.scale_type + ) + raise ValueError( + "Unknown type {}. This is currently" "an unreachable code.".format(one.type) + ) + + def traverse( + self, show_children: bool = False + ) -> Generator["ParameterConfig", None, None]: + """DFS Generator for parameter configs. + + Args: + show_children: If True, every generated ParameterConfig has + child_parameter_configs. For example, if 'foo' has two child configs + 'bar1' and 'bar2', then traversing 'foo' with show_children=True + generates (foo, with bar1,bar2 as children), (bar1), and (bar2). If + show_children=False, it generates (foo, without children), (bar1), and + (bar2). + + Yields: + DFS on all parameter configs. + """ + if show_children: + yield self + else: + yield self.clone_without_children + for child in self.child_parameter_configs: + yield from child.traverse(show_children) + + def contains( + self, value: Union[trial.ParameterValueTypes, trial.ParameterValue] + ) -> bool: + """Check if the `value` is a valid value for this parameter config.""" + if not isinstance(value, trial.ParameterValue): + value = trial.ParameterValue(value) + + if self.type == ParameterType.DOUBLE: + return self.bounds[0] <= value.as_float and value.as_float <= self.bounds[1] + elif self.type == ParameterType.INTEGER: + if value.as_int != value.as_float: + return False + return self.bounds[0] <= value.as_int and value.as_int <= self.bounds[1] + elif self.type == ParameterType.DISCRETE: + return value.as_float in self.feasible_values + elif self.type == ParameterType.CATEGORICAL: + return value.as_str in self.feasible_values + else: + raise NotImplementedError( + f"Cannot determine whether {value} is feasible" + f"for Unknown parameter type {self.type}.\n" + f"Full config: {repr(self)}" + ) + + @property + def num_feasible_values(self) -> Union[float, int]: + if self.type == ParameterType.DOUBLE: + return float("inf") + elif self.type == ParameterType.INTEGER: + return self.bounds[1] - self.bounds[0] + 1 + else: + return len(self.feasible_values) diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py index 15f50da9a1..81c3304b32 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py @@ -8,314 +8,353 @@ class ParameterConfigFactoryTest(parameterized.TestCase): - - def testCreatesDoubleConfig(self): - parameter_config = pc.ParameterConfig.factory( - 'name', - bounds=(-1.0, 1.0), - scale_type=pc.ScaleType.LINEAR, - default_value=0.1) - self.assertEqual(parameter_config.name, 'name') - self.assertEqual(parameter_config.type, pc.ParameterType.DOUBLE) - self.assertEqual(parameter_config.bounds, (-1, 1)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.LINEAR) - self.assertEqual(parameter_config.default_value, 0.1) - self.assertIsInstance(parameter_config.default_value, float) - with self.assertRaises(ValueError): - _ = parameter_config.feasible_values - - self.assertEqual(parameter_config.continuify(), parameter_config) - - def testCreatesIntegerConfig(self): - parameter_config = pc.ParameterConfig.factory( - 'name', bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=1) - self.assertEqual(parameter_config.name, 'name') - self.assertEqual(parameter_config.type, pc.ParameterType.INTEGER) - self.assertEqual(parameter_config.feasible_values, [1, 2, 3]) - self.assertEqual(parameter_config.bounds, (1, 3)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.LOG) - self.assertEqual(parameter_config.default_value, 1) - self.assertIsInstance(parameter_config.default_value, int) - - self.assertEqual( - parameter_config.continuify(), - pc.ParameterConfig.factory( - 'name', - bounds=(1.0, 3.0), - scale_type=pc.ScaleType.LOG, - default_value=1.0)) - - def testCreatesDiscreteConfig(self): - feasible_values = (-1, 3, 2) - parameter_config = pc.ParameterConfig.factory( - 'name', - feasible_values=feasible_values, - scale_type=pc.ScaleType.UNIFORM_DISCRETE, - default_value=2, - external_type=pc.ExternalType.INTEGER) - self.assertEqual(parameter_config.name, 'name') - self.assertEqual(parameter_config.type, pc.ParameterType.DISCRETE) - self.assertEqual(parameter_config.feasible_values, [-1, 2, 3]) - self.assertEqual(parameter_config.bounds, (-1, 3)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.UNIFORM_DISCRETE) - self.assertEqual(parameter_config.default_value, 2) - self.assertIsInstance(parameter_config.default_value, float) - self.assertEqual(parameter_config.external_type, pc.ExternalType.INTEGER) - - self.assertEqual( - parameter_config.continuify(), - pc.ParameterConfig.factory( - 'name', bounds=(-1.0, 3.0), default_value=2.0)) - - def testCreatesCategoricalConfig(self): - feasible_values = ('b', 'a', 'c') - parameter_config = pc.ParameterConfig.factory( - 'name', feasible_values=feasible_values, default_value='c') - self.assertEqual(parameter_config.name, 'name') - self.assertEqual(parameter_config.feasible_values, ['a', 'b', 'c']) - self.assertEqual(parameter_config.default_value, 'c') - with self.assertRaises(ValueError): - _ = parameter_config.bounds - - def testCreatesDoubleConfigIntDefault(self): - parameter_config = pc.ParameterConfig.factory( - 'name', - bounds=(-1.0, 1.0), - scale_type=pc.ScaleType.LINEAR, - default_value=1) - self.assertEqual(parameter_config.default_value, 1.0) - self.assertIsInstance(parameter_config.default_value, float) - - def testCreatesDiscreteConfigDoubleDefault(self): - feasible_values = (-1, 3, 2) - parameter_config = pc.ParameterConfig.factory( - 'name', - feasible_values=feasible_values, - scale_type=pc.ScaleType.UNIFORM_DISCRETE, - default_value=2.0) - self.assertEqual(parameter_config.default_value, 2.0) - self.assertIsInstance(parameter_config.default_value, float) - - def testCreatesIntegerConfigDoubleDefault(self): - parameter_config = pc.ParameterConfig.factory( - 'name', bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0) - self.assertEqual(parameter_config.default_value, 2.0) - self.assertIsInstance(parameter_config.default_value, int) - - def testCreatesIntegerConfigInvalidDoubleDefault(self): - with self.assertRaisesRegex(ValueError, 'default_value for an.*'): - pc.ParameterConfig.factory( - 'name', - bounds=(1, 3), - scale_type=pc.ScaleType.LOG, - default_value=2.0001) - - def testCreatesCategoricalConfigNoDefault(self): - feasible_values = ('b', 'a', 'c') - parameter_config = pc.ParameterConfig.factory( - 'name', feasible_values=feasible_values) - self.assertIsNone(parameter_config.default_value) - - def testCreatesCategoricalConfigBadDefault(self): - feasible_values = ('b', 'a', 'c') - with self.assertRaisesRegex(ValueError, - 'default_value has an incorrect type.*'): - pc.ParameterConfig.factory( - 'name', feasible_values=feasible_values, default_value=0.1) - - def testRaisesErrorWhenNameIsEmpty(self): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory('', bounds=(-1.0, 1.0)) - - def testRaisesErrorWhenOverSpecified(self): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory( - 'name', bounds=(-1.0, 1.0), feasible_values=['a', 'b', 'c']) - - @parameterized.named_parameters( - ('HaveInfinity', (-float('inf'), 1)), ('HaveNan', (1, float('nan'))), - ('HaveMixedTypes', (1, float(1))), ('AreWronglyOrdered', (1, -1))) - def testRaisesErrorWhenBounds(self, bounds): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory('name', bounds=bounds) - - @parameterized.named_parameters(('HaveDuplicateCategories', ['a', 'a', 'b']), - ('HaveDuplicateNumbers', [1.0, 2.0, 2.0]), - ('HaveMixedTypes', ['a', 1, 2])) - def testRaisesErrorWhenFeasibleValues(self, feasible_values): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory('name', feasible_values=feasible_values) - - -_child1 = pc.ParameterConfig.factory('double_child', bounds=(0.0, 1.0)) -_child2 = pc.ParameterConfig.factory('integer_child', bounds=(0, 1)) + def testCreatesDoubleConfig(self): + parameter_config = pc.ParameterConfig.factory( + "name", + bounds=(-1.0, 1.0), + scale_type=pc.ScaleType.LINEAR, + default_value=0.1, + ) + self.assertEqual(parameter_config.name, "name") + self.assertEqual(parameter_config.type, pc.ParameterType.DOUBLE) + self.assertEqual(parameter_config.bounds, (-1, 1)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.LINEAR) + self.assertEqual(parameter_config.default_value, 0.1) + self.assertIsInstance(parameter_config.default_value, float) + with self.assertRaises(ValueError): + _ = parameter_config.feasible_values + + self.assertEqual(parameter_config.continuify(), parameter_config) + + def testCreatesIntegerConfig(self): + parameter_config = pc.ParameterConfig.factory( + "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=1 + ) + self.assertEqual(parameter_config.name, "name") + self.assertEqual(parameter_config.type, pc.ParameterType.INTEGER) + self.assertEqual(parameter_config.feasible_values, [1, 2, 3]) + self.assertEqual(parameter_config.bounds, (1, 3)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.LOG) + self.assertEqual(parameter_config.default_value, 1) + self.assertIsInstance(parameter_config.default_value, int) + + self.assertEqual( + parameter_config.continuify(), + pc.ParameterConfig.factory( + "name", + bounds=(1.0, 3.0), + scale_type=pc.ScaleType.LOG, + default_value=1.0, + ), + ) + + def testCreatesDiscreteConfig(self): + feasible_values = (-1, 3, 2) + parameter_config = pc.ParameterConfig.factory( + "name", + feasible_values=feasible_values, + scale_type=pc.ScaleType.UNIFORM_DISCRETE, + default_value=2, + external_type=pc.ExternalType.INTEGER, + ) + self.assertEqual(parameter_config.name, "name") + self.assertEqual(parameter_config.type, pc.ParameterType.DISCRETE) + self.assertEqual(parameter_config.feasible_values, [-1, 2, 3]) + self.assertEqual(parameter_config.bounds, (-1, 3)) + self.assertEqual(parameter_config.scale_type, pc.ScaleType.UNIFORM_DISCRETE) + self.assertEqual(parameter_config.default_value, 2) + self.assertIsInstance(parameter_config.default_value, float) + self.assertEqual(parameter_config.external_type, pc.ExternalType.INTEGER) + + self.assertEqual( + parameter_config.continuify(), + pc.ParameterConfig.factory("name", bounds=(-1.0, 3.0), default_value=2.0), + ) + + def testCreatesCategoricalConfig(self): + feasible_values = ("b", "a", "c") + parameter_config = pc.ParameterConfig.factory( + "name", feasible_values=feasible_values, default_value="c" + ) + self.assertEqual(parameter_config.name, "name") + self.assertEqual(parameter_config.feasible_values, ["a", "b", "c"]) + self.assertEqual(parameter_config.default_value, "c") + with self.assertRaises(ValueError): + _ = parameter_config.bounds + + def testCreatesDoubleConfigIntDefault(self): + parameter_config = pc.ParameterConfig.factory( + "name", bounds=(-1.0, 1.0), scale_type=pc.ScaleType.LINEAR, default_value=1 + ) + self.assertEqual(parameter_config.default_value, 1.0) + self.assertIsInstance(parameter_config.default_value, float) + + def testCreatesDiscreteConfigDoubleDefault(self): + feasible_values = (-1, 3, 2) + parameter_config = pc.ParameterConfig.factory( + "name", + feasible_values=feasible_values, + scale_type=pc.ScaleType.UNIFORM_DISCRETE, + default_value=2.0, + ) + self.assertEqual(parameter_config.default_value, 2.0) + self.assertIsInstance(parameter_config.default_value, float) + + def testCreatesIntegerConfigDoubleDefault(self): + parameter_config = pc.ParameterConfig.factory( + "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0 + ) + self.assertEqual(parameter_config.default_value, 2.0) + self.assertIsInstance(parameter_config.default_value, int) + + def testCreatesIntegerConfigInvalidDoubleDefault(self): + with self.assertRaisesRegex(ValueError, "default_value for an.*"): + pc.ParameterConfig.factory( + "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0001 + ) + + def testCreatesCategoricalConfigNoDefault(self): + feasible_values = ("b", "a", "c") + parameter_config = pc.ParameterConfig.factory( + "name", feasible_values=feasible_values + ) + self.assertIsNone(parameter_config.default_value) + + def testCreatesCategoricalConfigBadDefault(self): + feasible_values = ("b", "a", "c") + with self.assertRaisesRegex( + ValueError, "default_value has an incorrect type.*" + ): + pc.ParameterConfig.factory( + "name", feasible_values=feasible_values, default_value=0.1 + ) + + def testRaisesErrorWhenNameIsEmpty(self): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory("", bounds=(-1.0, 1.0)) + + def testRaisesErrorWhenOverSpecified(self): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory( + "name", bounds=(-1.0, 1.0), feasible_values=["a", "b", "c"] + ) + + @parameterized.named_parameters( + ("HaveInfinity", (-float("inf"), 1)), + ("HaveNan", (1, float("nan"))), + ("HaveMixedTypes", (1, float(1))), + ("AreWronglyOrdered", (1, -1)), + ) + def testRaisesErrorWhenBounds(self, bounds): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory("name", bounds=bounds) + + @parameterized.named_parameters( + ("HaveDuplicateCategories", ["a", "a", "b"]), + ("HaveDuplicateNumbers", [1.0, 2.0, 2.0]), + ("HaveMixedTypes", ["a", 1, 2]), + ) + def testRaisesErrorWhenFeasibleValues(self, feasible_values): + with self.assertRaises(ValueError): + _ = pc.ParameterConfig.factory("name", feasible_values=feasible_values) + + +_child1 = pc.ParameterConfig.factory("double_child", bounds=(0.0, 1.0)) +_child2 = pc.ParameterConfig.factory("integer_child", bounds=(0, 1)) class ParameterConfigFactoryTestWithChildren(parameterized.TestCase): - - @parameterized.named_parameters( - ('IntParentValues', [([0], _child1), ([0, 1], _child2)]), - ('FloatParentValues', [([0.0], _child1), ([0.0, 1.0], _child2)])) - def testIntegerWithValid(self, children): - p = pc.ParameterConfig.factory('parent', bounds=(0, 1), children=children) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, - children[0][0]) - self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, - children[1][0]) - - @parameterized.named_parameters( - ('FloatParentValues', [([0.5], _child1)]), - ('StringParentValues', [(['0'], _child1), (['0.0', '1.0'], _child2)])) - def testIntegerWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory('parent', bounds=(0, 1), children=children) - - @parameterized.named_parameters( - ('IntParentValues', [([0], _child1), ([1], _child2)]), - ('FloatParentValues', [([0.0], _child1), ([0.0, 1.0], _child2)])) - def testDiscreteWithValid(self, children): - p = pc.ParameterConfig.factory( - 'parent', feasible_values=[0.0, 1.0], children=children) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, - children[0][0]) - self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, - children[1][0]) - - @parameterized.named_parameters(('StringParentValues', [(['0.0'], _child1), - (['0.0', - '1.0'], _child2)])) - def testDiscreteWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory( - 'parent', feasible_values=[0.0, 1.0], children=children) - - @parameterized.named_parameters( # pyformat: disable - ('StringParentValues', [(['a'], _child1), (['a', 'b'], _child2)])) - def testCategoricalWithValid(self, children): - p = pc.ParameterConfig.factory( - 'parent', feasible_values=['a', 'b'], children=children) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements(p.child_parameter_configs[0].matching_parent_values, - children[0][0]) - self.assertSameElements(p.child_parameter_configs[1].matching_parent_values, - children[1][0]) - - @parameterized.named_parameters(('StringParentValues', [(['0.0'], _child1), - (['1.0'], _child2)])) - def testCategoricalWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory( - 'parent', feasible_values=[0.0, 1.0], children=children) - - def testAddChildren(self): - children = [(['a'], _child1), (['a', 'b'], _child2)] - p = pc.ParameterConfig.factory( - 'parent', feasible_values=['a', 'b'], children=children) - new_children = [ - (['a'], pc.ParameterConfig.factory('double_child2', bounds=(1.0, 2.0))), - (['b'], - pc.ParameterConfig.factory( - 'categorical_child', feasible_values=['c', 'd'])), - ] - p2 = p.add_children(new_children) - self.assertLen(p.child_parameter_configs, 2) - self.assertSameElements([c.name for c in p.child_parameter_configs], - [c[1].name for c in children]) - - self.assertLen(p2.child_parameter_configs, 4) - expected_names = [c[1].name for c in children] - expected_names += [c[1].name for c in new_children] - got_names = [c.name for c in p2.child_parameter_configs] - self.assertSameElements(got_names, expected_names) + @parameterized.named_parameters( + ("IntParentValues", [([0], _child1), ([0, 1], _child2)]), + ("FloatParentValues", [([0.0], _child1), ([0.0, 1.0], _child2)]), + ) + def testIntegerWithValid(self, children): + p = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements( + p.child_parameter_configs[0].matching_parent_values, children[0][0] + ) + self.assertSameElements( + p.child_parameter_configs[1].matching_parent_values, children[1][0] + ) + + @parameterized.named_parameters( + ("FloatParentValues", [([0.5], _child1)]), + ("StringParentValues", [(["0"], _child1), (["0.0", "1.0"], _child2)]), + ) + def testIntegerWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) + + @parameterized.named_parameters( + ("IntParentValues", [([0], _child1), ([1], _child2)]), + ("FloatParentValues", [([0.0], _child1), ([0.0, 1.0], _child2)]), + ) + def testDiscreteWithValid(self, children): + p = pc.ParameterConfig.factory( + "parent", feasible_values=[0.0, 1.0], children=children + ) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements( + p.child_parameter_configs[0].matching_parent_values, children[0][0] + ) + self.assertSameElements( + p.child_parameter_configs[1].matching_parent_values, children[1][0] + ) + + @parameterized.named_parameters( + ("StringParentValues", [(["0.0"], _child1), (["0.0", "1.0"], _child2)]) + ) + def testDiscreteWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory( + "parent", feasible_values=[0.0, 1.0], children=children + ) + + @parameterized.named_parameters( # pyformat: disable + ("StringParentValues", [(["a"], _child1), (["a", "b"], _child2)]) + ) + def testCategoricalWithValid(self, children): + p = pc.ParameterConfig.factory( + "parent", feasible_values=["a", "b"], children=children + ) + self.assertLen(p.child_parameter_configs, 2) + self.assertEmpty(p.matching_parent_values) + self.assertSameElements( + p.child_parameter_configs[0].matching_parent_values, children[0][0] + ) + self.assertSameElements( + p.child_parameter_configs[1].matching_parent_values, children[1][0] + ) + + @parameterized.named_parameters( + ("StringParentValues", [(["0.0"], _child1), (["1.0"], _child2)]) + ) + def testCategoricalWithInvalid(self, children): + with self.assertRaises(TypeError): + _ = pc.ParameterConfig.factory( + "parent", feasible_values=[0.0, 1.0], children=children + ) + + def testAddChildren(self): + children = [(["a"], _child1), (["a", "b"], _child2)] + p = pc.ParameterConfig.factory( + "parent", feasible_values=["a", "b"], children=children + ) + new_children = [ + (["a"], pc.ParameterConfig.factory("double_child2", bounds=(1.0, 2.0))), + ( + ["b"], + pc.ParameterConfig.factory( + "categorical_child", feasible_values=["c", "d"] + ), + ), + ] + p2 = p.add_children(new_children) + self.assertLen(p.child_parameter_configs, 2) + self.assertSameElements( + [c.name for c in p.child_parameter_configs], [c[1].name for c in children] + ) + + self.assertLen(p2.child_parameter_configs, 4) + expected_names = [c[1].name for c in children] + expected_names += [c[1].name for c in new_children] + got_names = [c.name for c in p2.child_parameter_configs] + self.assertSameElements(got_names, expected_names) class MergeTest(parameterized.TestCase): - - def test_merge_bounds(self): - pc1 = pc.ParameterConfig.factory('pc1', bounds=(0.0, 2.0)) - pc2 = pc.ParameterConfig.factory('pc2', bounds=(-1.0, 1.0)) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory('pc1', bounds=(-1.0, 2.0))) - - def test_merge_discrete(self): - pc1 = pc.ParameterConfig.factory( - 'pc1', feasible_values=[0.0, 2.0], scale_type=pc.ScaleType.LINEAR) - pc2 = pc.ParameterConfig.factory('pc2', feasible_values=[-1.0, 0.0]) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory( - 'pc1', - feasible_values=[-1.0, 0.0, 2.0], - scale_type=pc.ScaleType.LINEAR)) - - def test_merge_categorical(self): - pc1 = pc.ParameterConfig.factory('pc1', feasible_values=['a', 'b']) - pc2 = pc.ParameterConfig.factory('pc2', feasible_values=['a', 'c']) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory('pc1', feasible_values=['a', 'b', 'c'])) + def test_merge_bounds(self): + pc1 = pc.ParameterConfig.factory("pc1", bounds=(0.0, 2.0)) + pc2 = pc.ParameterConfig.factory("pc2", bounds=(-1.0, 1.0)) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory("pc1", bounds=(-1.0, 2.0)), + ) + + def test_merge_discrete(self): + pc1 = pc.ParameterConfig.factory( + "pc1", feasible_values=[0.0, 2.0], scale_type=pc.ScaleType.LINEAR + ) + pc2 = pc.ParameterConfig.factory("pc2", feasible_values=[-1.0, 0.0]) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory( + "pc1", feasible_values=[-1.0, 0.0, 2.0], scale_type=pc.ScaleType.LINEAR + ), + ) + + def test_merge_categorical(self): + pc1 = pc.ParameterConfig.factory("pc1", feasible_values=["a", "b"]) + pc2 = pc.ParameterConfig.factory("pc2", feasible_values=["a", "c"]) + self.assertEqual( + pc.ParameterConfig.merge(pc1, pc2), + pc.ParameterConfig.factory("pc1", feasible_values=["a", "b", "c"]), + ) class ParameterConfigContainsTest(parameterized.TestCase): + @parameterized.parameters((1.0, True), (-2.0, False), (3.0, False)) + def testFloat(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory("pc1", bounds=(-1.0, 2.0)) + self.assertEqual(config.contains(value), expected) - @parameterized.parameters((1.0, True), (-2.0, False), (3.0, False)) - def testFloat(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory('pc1', bounds=(-1., 2.)) - self.assertEqual(config.contains(value), expected) + @parameterized.parameters((1, True), (-2, False), (3, False), (1.5, False)) + def testInt(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory("pc1", bounds=(-1, 2)) + self.assertEqual(config.contains(value), expected) - @parameterized.parameters((1, True), (-2, False), (3, False), (1.5, False)) - def testInt(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory('pc1', bounds=(-1, 2)) - self.assertEqual(config.contains(value), expected) + @parameterized.parameters((1.0, False), (2, True), (-1, True)) + def testDiscrete(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory("pc1", feasible_values=[-1.0, 0.0, 2.0]) + self.assertEqual(config.contains(value), expected) - @parameterized.parameters((1.0, False), (2, True), (-1, True)) - def testDiscrete(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory('pc1', feasible_values=[-1., 0., 2.]) - self.assertEqual(config.contains(value), expected) + @parameterized.parameters(("a", True), ("b", False), ("c", False)) + def testCategorical(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory("pc1", feasible_values=["a", "aa", "aaa"]) + self.assertEqual(config.contains(value), expected) - @parameterized.parameters(('a', True), ('b', False), ('c', False)) - def testCategorical(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory( - 'pc1', feasible_values=['a', 'aa', 'aaa']) - self.assertEqual(config.contains(value), expected) - - @parameterized.parameters((True, True), ('a', False), (0, False)) - def testBoolean(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory( - 'pc1', feasible_values=['true', 'false']) - self.assertEqual(config.contains(value), expected) + @parameterized.parameters((True, True), ("a", False), (0, False)) + def testBoolean(self, value: Any, expected: bool): + config = pc.ParameterConfig.factory("pc1", feasible_values=["true", "false"]) + self.assertEqual(config.contains(value), expected) class TraverseTest(parameterized.TestCase): - - @parameterized.named_parameters(('ShowChildrenTrue', True), - ('ShowChildrenFalse', False)) - def testTraverse(self, show_children): - grandchild1 = pc.ParameterConfig.factory('grandchild1', bounds=(-1.0, 1.0)) - grandchildren = [(['a'], grandchild1), (['b'], grandchild1)] - child1 = pc.ParameterConfig.factory( - 'child1', feasible_values=['a', 'b'], children=grandchildren) - - child2 = pc.ParameterConfig.factory('child2', bounds=(0.0, 1.0)) - children = [([0], child1), ([1], child1), ([0, 1], child2)] - parent = pc.ParameterConfig.factory( - 'parent', bounds=(0, 1), children=children) - traversed_names = [ - pc.name for pc in parent.traverse(show_children=show_children) - ] - # Some parameter names are reused for separate child nodes, so they - # will appear multiple times, but they are indeed separate parameters. - self.assertEqual(traversed_names, [ - 'parent', 'child1', 'grandchild1', 'grandchild1', 'child1', - 'grandchild1', 'grandchild1', 'child2' - ]) - - -if __name__ == '__main__': - absltest.main() + @parameterized.named_parameters( + ("ShowChildrenTrue", True), ("ShowChildrenFalse", False) + ) + def testTraverse(self, show_children): + grandchild1 = pc.ParameterConfig.factory("grandchild1", bounds=(-1.0, 1.0)) + grandchildren = [(["a"], grandchild1), (["b"], grandchild1)] + child1 = pc.ParameterConfig.factory( + "child1", feasible_values=["a", "b"], children=grandchildren + ) + + child2 = pc.ParameterConfig.factory("child2", bounds=(0.0, 1.0)) + children = [([0], child1), ([1], child1), ([0, 1], child2)] + parent = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) + traversed_names = [ + pc.name for pc in parent.traverse(show_children=show_children) + ] + # Some parameter names are reused for separate child nodes, so they + # will appear multiple times, but they are indeed separate parameters. + self.assertEqual( + traversed_names, + [ + "parent", + "child1", + "grandchild1", + "grandchild1", + "child1", + "grandchild1", + "grandchild1", + "child2", + ], + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py index b4d05d2d01..c2c3cd30c8 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -7,7 +7,7 @@ from google.protobuf import duration_pb2 from google.cloud.aiplatform.compat.types import study as study_pb2 from google.cloud.aiplatform.vizier.pyvizier import parameter_config -from google.cloud.aiplatform.vizier.pyvizier import trial +from google.cloud.aiplatform.vizier.pyvizier import trial ScaleType = parameter_config.ScaleType _ScaleTypePb2 = study_pb2.StudySpec.ParameterSpec.ScaleType @@ -16,429 +16,490 @@ class _ScaleTypeMap: - """Proto converter for scale type.""" - _pyvizier_to_proto = { - parameter_config.ScaleType.LINEAR: - _ScaleTypePb2.UNIT_LINEAR_SCALE, - parameter_config.ScaleType.LOG: - _ScaleTypePb2.UNIT_LOG_SCALE, - parameter_config.ScaleType.REVERSE_LOG: - _ScaleTypePb2.UNIT_REVERSE_LOG_SCALE, - } - _proto_to_pyvizier = {v: k for k, v in _pyvizier_to_proto.items()} - - @classmethod - def to_proto(cls, pyvizier: parameter_config.ScaleType) -> _ScaleTypePb2: - return cls._pyvizier_to_proto[pyvizier] - - @classmethod - def from_proto(cls, proto: _ScaleTypePb2) -> parameter_config.ScaleType: - return cls._proto_to_pyvizier[proto] + """Proto converter for scale type.""" + _pyvizier_to_proto = { + parameter_config.ScaleType.LINEAR: _ScaleTypePb2.UNIT_LINEAR_SCALE, + parameter_config.ScaleType.LOG: _ScaleTypePb2.UNIT_LOG_SCALE, + parameter_config.ScaleType.REVERSE_LOG: _ScaleTypePb2.UNIT_REVERSE_LOG_SCALE, + } + _proto_to_pyvizier = {v: k for k, v in _pyvizier_to_proto.items()} -class ParameterConfigConverter: - """Converter for ParameterConfig.""" - - @classmethod - def _set_bounds(cls, proto: study_pb2.StudySpec.ParameterSpec, lower: float, - upper: float, parameter_type: ParameterType): - """Sets the proto's min_value and max_value fields.""" - if parameter_type == ParameterType.INTEGER: - proto.integer_value_spec.min_value = lower - proto.integer_value_spec.max_value = upper - elif parameter_type == ParameterType.DOUBLE: - proto.double_value_spec.min_value = lower - proto.double_value_spec.max_value = upper - - @classmethod - def _set_feasible_points(cls, proto: study_pb2.StudySpec.ParameterSpec, - feasible_points: Sequence[float]): - """Sets the proto's feasible_points field.""" - feasible_points = sorted(feasible_points) - proto.discrete_value_spec.values.clear() - proto.discrete_value_spec.values.extend(feasible_points) - - @classmethod - def _set_categories(cls, proto: study_pb2.StudySpec.ParameterSpec, - categories: Sequence[str]): - """Sets the protos' categories field.""" - proto.categorical_value_spec.values.clear() - proto.categorical_value_spec.values.extend(categories) - - @classmethod - def _set_default_value(cls, proto: study_pb2.StudySpec.ParameterSpec, - default_value: Union[float, int, str]): - """Sets the protos' default_value field.""" - which_pv_spec = proto.WhichOneof('parameter_value_spec') - getattr(proto, which_pv_spec).default_value.value = default_value - - @classmethod - def _matching_parent_values( - cls, proto: study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec - ) -> MonotypeParameterSequence: - """Returns the matching parent values, if set.""" - oneof_name = proto.WhichOneof('parent_value_condition') - if not oneof_name: - return [] - if oneof_name in ('parent_discrete_values', 'parent_int_values', - 'parent_categorical_values'): - return list(getattr(getattr(proto, oneof_name), 'values')) - raise ValueError('Unknown matching_parent_vals: {}'.format(oneof_name)) - - @classmethod - def from_proto( - cls, - proto: study_pb2.StudySpec.ParameterSpec, - *, - strict_validation: bool = False) -> parameter_config.ParameterConfig: - """Creates a ParameterConfig. - - Args: - proto: - strict_validation: If True, raise ValueError to enforce that - from_proto(proto).to_proto == proto. - - Returns: - ParameterConfig object - - Raises: - ValueError: See the "strict_validtion" arg documentation. - """ - feasible_values = [] - oneof_name = proto._pb.WhichOneof('parameter_value_spec') - if oneof_name == 'integer_value_spec': - bounds = (int(proto.integer_value_spec.min_value), - int(proto.integer_value_spec.max_value)) - elif oneof_name == 'double_value_spec': - bounds = (proto.double_value_spec.min_value, - proto.double_value_spec.max_value) - elif oneof_name == 'discrete_value_spec': - bounds = None - feasible_values = proto.discrete_value_spec.values - elif oneof_name == 'categorical_value_spec': - bounds = None - feasible_values = proto.categorical_value_spec.values - - default_value = None - if getattr(proto, oneof_name).default_value: - default_value = getattr(proto, oneof_name).default_value - - if proto.conditional_parameter_specs: - children = [] - for conditional_ps in proto.conditional_parameter_specs: - parent_values = cls._matching_parent_values(conditional_ps) - children.append( - (parent_values, cls.from_proto(conditional_ps.parameter_spec))) - else: - children = None - - scale_type = None - if proto.scale_type: - scale_type = _ScaleTypeMap.from_proto(proto.scale_type) - - try: - config = parameter_config.ParameterConfig.factory( - name=proto.parameter_id, - feasible_values=feasible_values, - bounds=bounds, - children=children, - scale_type=scale_type, - default_value=default_value) - except ValueError as e: - raise ValueError( - 'The provided proto was misconfigured. {}'.format(proto)) from e - - if strict_validation and cls.to_proto(config) != proto: - raise ValueError( - 'The provided proto was misconfigured. Expected: {} Given: {}'.format( - cls.to_proto(config), proto)) - return config - - @classmethod - def _set_child_parameter_configs( - cls, parent_proto: study_pb2.StudySpec.ParameterSpec, - pc: parameter_config.ParameterConfig): - """Sets the parent_proto's conditional_parameter_specs field. - - Args: - parent_proto: Modified in place. - pc: Parent ParameterConfig to copy children from. - - Raises: - ValueError: If the child configs are invalid - """ - children: List[Tuple[MonotypeParameterSequence, - parameter_config.ParameterConfig]] = [] - for child in pc.child_parameter_configs: - children.append((child.matching_parent_values, child)) - if not children: - return - parent_proto.conditional_parameter_specs.clear() - for child_pair in children: - if len(child_pair) != 2: - raise ValueError("""Each element in children must be a tuple of - (Sequence of valid parent values, ParameterConfig)""") - - logging.debug('_set_child_parameter_configs: parent_proto=%s, children=%s', - parent_proto, children) - for unsorted_parent_values, child in children: - parent_values = sorted(unsorted_parent_values) - child_proto = cls.to_proto(child.clone_without_children) - conditional_parameter_spec = study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec( - parameter_spec=child_proto) - - if parent_proto.HasField('discrete_value_spec'): - conditional_parameter_spec.parent_discrete_values.values[:] = parent_values - elif parent_proto.HasField('categorical_value_spec'): - conditional_parameter_spec.parent_categorical_values.values[:] = parent_values - elif parent_proto.HasField('integer_value_spec'): - conditional_parameter_spec.parent_int_values.values[:] = parent_values - else: - raise ValueError('DOUBLE type cannot have child parameters') - if child.child_parameter_configs: - cls._set_child_parameter_configs(child_proto, child) - parent_proto.conditional_parameter_specs.extend( - [conditional_parameter_spec]) - - @classmethod - def to_proto( - cls, pc: parameter_config.ParameterConfig - ) -> study_pb2.StudySpec.ParameterSpec: - """Returns a ParameterConfig Proto.""" - proto = study_pb2.StudySpec.ParameterSpec(parameter_id=pc.name) - if pc.type == ParameterType.DISCRETE: - cls._set_feasible_points(proto, [float(v) for v in pc.feasible_values]) - elif pc.type == ParameterType.CATEGORICAL: - cls._set_categories(proto, pc.feasible_values) - elif pc.type in (ParameterType.INTEGER, ParameterType.DOUBLE): - cls._set_bounds(proto, pc.bounds[0], pc.bounds[1], pc.type) - else: - raise ValueError('Invalid ParameterConfig: {}'.format(pc)) - if pc.scale_type is not None and pc.scale_type != ScaleType.UNIFORM_DISCRETE: - proto.scale_type = _ScaleTypeMap.to_proto(pc.scale_type) - if pc.default_value is not None: - cls._set_default_value(proto, pc.default_value) + @classmethod + def to_proto(cls, pyvizier: parameter_config.ScaleType) -> _ScaleTypePb2: + return cls._pyvizier_to_proto[pyvizier] - cls._set_child_parameter_configs(proto, pc) - return proto + @classmethod + def from_proto(cls, proto: _ScaleTypePb2) -> parameter_config.ScaleType: + return cls._proto_to_pyvizier[proto] -class ParameterValueConverter: - """Converter for trial.ParameterValue.""" - - @classmethod - def from_proto( - cls, proto: study_pb2.Trial.Parameter) -> Optional[trial.ParameterValue]: - """Returns whichever value that is populated, or None.""" - potential_value = proto.value - if isinstance(potential_value, float) or isinstance( - potential_value, str) or isinstance(potential_value, bool): - return trial.ParameterValue(potential_value) - else: - return None - - @classmethod - def to_proto(cls, parameter_value: trial.ParameterValue, - name: str) -> study_pb2.Trial.Parameter: - """Returns Parameter Proto.""" - proto = study_pb2.Trial.Parameter(parameter_id=name) +class ParameterConfigConverter: + """Converter for ParameterConfig.""" + + @classmethod + def _set_bounds( + cls, + proto: study_pb2.StudySpec.ParameterSpec, + lower: float, + upper: float, + parameter_type: ParameterType, + ): + """Sets the proto's min_value and max_value fields.""" + if parameter_type == ParameterType.INTEGER: + proto.integer_value_spec.min_value = lower + proto.integer_value_spec.max_value = upper + elif parameter_type == ParameterType.DOUBLE: + proto.double_value_spec.min_value = lower + proto.double_value_spec.max_value = upper + + @classmethod + def _set_feasible_points( + cls, proto: study_pb2.StudySpec.ParameterSpec, feasible_points: Sequence[float] + ): + """Sets the proto's feasible_points field.""" + feasible_points = sorted(feasible_points) + proto.discrete_value_spec.values.clear() + proto.discrete_value_spec.values.extend(feasible_points) + + @classmethod + def _set_categories( + cls, proto: study_pb2.StudySpec.ParameterSpec, categories: Sequence[str] + ): + """Sets the protos' categories field.""" + proto.categorical_value_spec.values.clear() + proto.categorical_value_spec.values.extend(categories) + + @classmethod + def _set_default_value( + cls, + proto: study_pb2.StudySpec.ParameterSpec, + default_value: Union[float, int, str], + ): + """Sets the protos' default_value field.""" + which_pv_spec = proto.WhichOneof("parameter_value_spec") + getattr(proto, which_pv_spec).default_value.value = default_value + + @classmethod + def _matching_parent_values( + cls, proto: study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec + ) -> MonotypeParameterSequence: + """Returns the matching parent values, if set.""" + oneof_name = proto.WhichOneof("parent_value_condition") + if not oneof_name: + return [] + if oneof_name in ( + "parent_discrete_values", + "parent_int_values", + "parent_categorical_values", + ): + return list(getattr(getattr(proto, oneof_name), "values")) + raise ValueError("Unknown matching_parent_vals: {}".format(oneof_name)) + + @classmethod + def from_proto( + cls, + proto: study_pb2.StudySpec.ParameterSpec, + *, + strict_validation: bool = False + ) -> parameter_config.ParameterConfig: + """Creates a ParameterConfig. + + Args: + proto: + strict_validation: If True, raise ValueError to enforce that + from_proto(proto).to_proto == proto. + + Returns: + ParameterConfig object + + Raises: + ValueError: See the "strict_validtion" arg documentation. + """ + feasible_values = [] + oneof_name = proto._pb.WhichOneof("parameter_value_spec") + if oneof_name == "integer_value_spec": + bounds = ( + int(proto.integer_value_spec.min_value), + int(proto.integer_value_spec.max_value), + ) + elif oneof_name == "double_value_spec": + bounds = ( + proto.double_value_spec.min_value, + proto.double_value_spec.max_value, + ) + elif oneof_name == "discrete_value_spec": + bounds = None + feasible_values = proto.discrete_value_spec.values + elif oneof_name == "categorical_value_spec": + bounds = None + feasible_values = proto.categorical_value_spec.values + + default_value = None + if getattr(proto, oneof_name).default_value: + default_value = getattr(proto, oneof_name).default_value + + if proto.conditional_parameter_specs: + children = [] + for conditional_ps in proto.conditional_parameter_specs: + parent_values = cls._matching_parent_values(conditional_ps) + children.append( + (parent_values, cls.from_proto(conditional_ps.parameter_spec)) + ) + else: + children = None + + scale_type = None + if proto.scale_type: + scale_type = _ScaleTypeMap.from_proto(proto.scale_type) + + try: + config = parameter_config.ParameterConfig.factory( + name=proto.parameter_id, + feasible_values=feasible_values, + bounds=bounds, + children=children, + scale_type=scale_type, + default_value=default_value, + ) + except ValueError as e: + raise ValueError( + "The provided proto was misconfigured. {}".format(proto) + ) from e + + if strict_validation and cls.to_proto(config) != proto: + raise ValueError( + "The provided proto was misconfigured. Expected: {} Given: {}".format( + cls.to_proto(config), proto + ) + ) + return config + + @classmethod + def _set_child_parameter_configs( + cls, + parent_proto: study_pb2.StudySpec.ParameterSpec, + pc: parameter_config.ParameterConfig, + ): + """Sets the parent_proto's conditional_parameter_specs field. + + Args: + parent_proto: Modified in place. + pc: Parent ParameterConfig to copy children from. + + Raises: + ValueError: If the child configs are invalid + """ + children: List[ + Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] + ] = [] + for child in pc.child_parameter_configs: + children.append((child.matching_parent_values, child)) + if not children: + return + parent_proto.conditional_parameter_specs.clear() + for child_pair in children: + if len(child_pair) != 2: + raise ValueError( + """Each element in children must be a tuple of + (Sequence of valid parent values, ParameterConfig)""" + ) + + logging.debug( + "_set_child_parameter_configs: parent_proto=%s, children=%s", + parent_proto, + children, + ) + for unsorted_parent_values, child in children: + parent_values = sorted(unsorted_parent_values) + child_proto = cls.to_proto(child.clone_without_children) + conditional_parameter_spec = ( + study_pb2.StudySpec.ParameterSpec.ConditionalParameterSpec( + parameter_spec=child_proto + ) + ) + + if parent_proto.HasField("discrete_value_spec"): + conditional_parameter_spec.parent_discrete_values.values[ + : + ] = parent_values + elif parent_proto.HasField("categorical_value_spec"): + conditional_parameter_spec.parent_categorical_values.values[ + : + ] = parent_values + elif parent_proto.HasField("integer_value_spec"): + conditional_parameter_spec.parent_int_values.values[:] = parent_values + else: + raise ValueError("DOUBLE type cannot have child parameters") + if child.child_parameter_configs: + cls._set_child_parameter_configs(child_proto, child) + parent_proto.conditional_parameter_specs.extend( + [conditional_parameter_spec] + ) + + @classmethod + def to_proto( + cls, pc: parameter_config.ParameterConfig + ) -> study_pb2.StudySpec.ParameterSpec: + """Returns a ParameterConfig Proto.""" + proto = study_pb2.StudySpec.ParameterSpec(parameter_id=pc.name) + if pc.type == ParameterType.DISCRETE: + cls._set_feasible_points(proto, [float(v) for v in pc.feasible_values]) + elif pc.type == ParameterType.CATEGORICAL: + cls._set_categories(proto, pc.feasible_values) + elif pc.type in (ParameterType.INTEGER, ParameterType.DOUBLE): + cls._set_bounds(proto, pc.bounds[0], pc.bounds[1], pc.type) + else: + raise ValueError("Invalid ParameterConfig: {}".format(pc)) + if pc.scale_type is not None and pc.scale_type != ScaleType.UNIFORM_DISCRETE: + proto.scale_type = _ScaleTypeMap.to_proto(pc.scale_type) + if pc.default_value is not None: + cls._set_default_value(proto, pc.default_value) + + cls._set_child_parameter_configs(proto, pc) + return proto - if isinstance(parameter_value.value, int): - proto.value.number_value = parameter_value.value - elif isinstance(parameter_value.value, bool): - proto.value.bool_value = parameter_value.value - elif isinstance(parameter_value.value, float): - proto.value.number_value = parameter_value.value - elif isinstance(parameter_value.value, str): - proto.value.string_value = parameter_value.value - return proto +class ParameterValueConverter: + """Converter for trial.ParameterValue.""" + + @classmethod + def from_proto( + cls, proto: study_pb2.Trial.Parameter + ) -> Optional[trial.ParameterValue]: + """Returns whichever value that is populated, or None.""" + potential_value = proto.value + if ( + isinstance(potential_value, float) + or isinstance(potential_value, str) + or isinstance(potential_value, bool) + ): + return trial.ParameterValue(potential_value) + else: + return None + + @classmethod + def to_proto( + cls, parameter_value: trial.ParameterValue, name: str + ) -> study_pb2.Trial.Parameter: + """Returns Parameter Proto.""" + proto = study_pb2.Trial.Parameter(parameter_id=name) + + if isinstance(parameter_value.value, int): + proto.value.number_value = parameter_value.value + elif isinstance(parameter_value.value, bool): + proto.value.bool_value = parameter_value.value + elif isinstance(parameter_value.value, float): + proto.value.number_value = parameter_value.value + elif isinstance(parameter_value.value, str): + proto.value.string_value = parameter_value.value + + return proto class MeasurementConverter: - """Converter for trial.MeasurementConverter.""" - - @classmethod - def from_proto(cls, proto: study_pb2.Measurement) -> trial.Measurement: - """Creates a valid instance from proto. - - Args: - proto: Measurement proto. - - Returns: - A valid instance of Measurement object. Metrics with invalid values - are automatically filtered out. - """ - - metrics = dict() - - for metric in proto.metrics: - if metric.metric_id in metrics and metrics[ - metric.metric_id].value != metric.value: - logging.log_first_n( - logging.ERROR, 'Duplicate metric of name "%s".' - 'The newly found value %s will be used and ' - 'the previously found value %s will be discarded.' - 'This always happens if the proto has an empty-named metric.', 5, - metric.metric_id, metric.value, metrics[metric.metric_id].value) - try: - metrics[metric.metric_id] = trial.Metric(value=metric.value) - except ValueError: - pass - return trial.Measurement( - metrics=metrics, - elapsed_secs=proto.elapsed_duration.seconds, - steps=proto.step_count) - - @classmethod - def to_proto(cls, measurement: trial.Measurement) -> study_pb2.Measurement: - """Converts to Measurement proto.""" - proto = study_pb2.Measurement() - for name, metric in measurement.metrics.items(): - proto.metrics.append(study_pb2.Measurement.Metric(metric_id=name, value=metric.value)) - - proto.step_count = measurement.steps - int_seconds = int(measurement.elapsed_secs) - proto.elapsed_duration = duration_pb2.Duration(seconds=int_seconds, nanos=int(1e9 * (measurement.elapsed_secs - int_seconds))) - return proto - - -def _to_pyvizier_trial_status( - proto_state: study_pb2.Trial.State) -> trial.TrialStatus: - """from_proto conversion for Trial statuses.""" - if proto_state == study_pb2.Trial.State.REQUESTED: - return trial.TrialStatus.REQUESTED - elif proto_state == study_pb2.Trial.State.ACTIVE: - return trial.TrialStatus.ACTIVE - if proto_state == study_pb2.Trial.State.STOPPING: - return trial.TrialStatus.STOPPING - if proto_state == study_pb2.Trial.State.SUCCEEDED: - return trial.TrialStatus.COMPLETED - elif proto_state == study_pb2.Trial.State.INFEASIBLE: - return trial.TrialStatus.COMPLETED - else: - return trial.TrialStatus.UNKNOWN - - -def _from_pyvizier_trial_status(status: trial.TrialStatus, - infeasible: bool) -> study_pb2.Trial.State: - """to_proto conversion for Trial states.""" - if status == trial.TrialStatus.REQUESTED: - return study_pb2.Trial.State.REQUESTED - elif status == trial.TrialStatus.ACTIVE: - return study_pb2.Trial.State.ACTIVE - elif status == trial.TrialStatus.STOPPING: - return study_pb2.Trial.State.STOPPING - elif status == trial.TrialStatus.COMPLETED: - if infeasible: - return study_pb2.Trial.State.INFEASIBLE + """Converter for trial.MeasurementConverter.""" + + @classmethod + def from_proto(cls, proto: study_pb2.Measurement) -> trial.Measurement: + """Creates a valid instance from proto. + + Args: + proto: Measurement proto. + + Returns: + A valid instance of Measurement object. Metrics with invalid values + are automatically filtered out. + """ + + metrics = dict() + + for metric in proto.metrics: + if ( + metric.metric_id in metrics + and metrics[metric.metric_id].value != metric.value + ): + logging.log_first_n( + logging.ERROR, + 'Duplicate metric of name "%s".' + "The newly found value %s will be used and " + "the previously found value %s will be discarded." + "This always happens if the proto has an empty-named metric.", + 5, + metric.metric_id, + metric.value, + metrics[metric.metric_id].value, + ) + try: + metrics[metric.metric_id] = trial.Metric(value=metric.value) + except ValueError: + pass + return trial.Measurement( + metrics=metrics, + elapsed_secs=proto.elapsed_duration.seconds, + steps=proto.step_count, + ) + + @classmethod + def to_proto(cls, measurement: trial.Measurement) -> study_pb2.Measurement: + """Converts to Measurement proto.""" + proto = study_pb2.Measurement() + for name, metric in measurement.metrics.items(): + proto.metrics.append( + study_pb2.Measurement.Metric(metric_id=name, value=metric.value) + ) + + proto.step_count = measurement.steps + int_seconds = int(measurement.elapsed_secs) + proto.elapsed_duration = duration_pb2.Duration( + seconds=int_seconds, + nanos=int(1e9 * (measurement.elapsed_secs - int_seconds)), + ) + return proto + + +def _to_pyvizier_trial_status(proto_state: study_pb2.Trial.State) -> trial.TrialStatus: + """from_proto conversion for Trial statuses.""" + if proto_state == study_pb2.Trial.State.REQUESTED: + return trial.TrialStatus.REQUESTED + elif proto_state == study_pb2.Trial.State.ACTIVE: + return trial.TrialStatus.ACTIVE + if proto_state == study_pb2.Trial.State.STOPPING: + return trial.TrialStatus.STOPPING + if proto_state == study_pb2.Trial.State.SUCCEEDED: + return trial.TrialStatus.COMPLETED + elif proto_state == study_pb2.Trial.State.INFEASIBLE: + return trial.TrialStatus.COMPLETED + else: + return trial.TrialStatus.UNKNOWN + + +def _from_pyvizier_trial_status( + status: trial.TrialStatus, infeasible: bool +) -> study_pb2.Trial.State: + """to_proto conversion for Trial states.""" + if status == trial.TrialStatus.REQUESTED: + return study_pb2.Trial.State.REQUESTED + elif status == trial.TrialStatus.ACTIVE: + return study_pb2.Trial.State.ACTIVE + elif status == trial.TrialStatus.STOPPING: + return study_pb2.Trial.State.STOPPING + elif status == trial.TrialStatus.COMPLETED: + if infeasible: + return study_pb2.Trial.State.INFEASIBLE + else: + return study_pb2.Trial.State.SUCCEEDED else: - return study_pb2.Trial.State.SUCCEEDED - else: - return study_pb2.Trial.State.STATE_UNSPECIFIED + return study_pb2.Trial.State.STATE_UNSPECIFIED class TrialConverter: - """Converter for trial.TrialConverter.""" - - @classmethod - def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: - """Converts from Trial proto to object. - - Args: - proto: Trial proto. - - Returns: - A Trial object. - """ - parameters = {} - for parameter in proto.parameters: - value = ParameterValueConverter.from_proto(parameter) - if value is not None: - if parameter.parameter_id in parameters: - raise ValueError('Invalid trial proto contains duplicate parameter {}' - ': {}'.format(parameter.parameter_id, proto)) - parameters[parameter.parameter_id] = value - else: - logging.warning('A parameter without a value will be dropped: %s', - parameter) - - final_measurement = None - if proto.final_measurement: - final_measurement = MeasurementConverter.from_proto( - proto.final_measurement) - - completion_time = None - infeasibility_reason = None - if proto.state == study_pb2.Trial.State.SUCCEEDED: - if proto.end_time: - completion_ts = proto.end_time.nanosecond / 1e9 - completion_time = datetime.datetime.fromtimestamp(completion_ts) - elif proto.state == study_pb2.Trial.State.INFEASIBLE: - infeasibility_reason = proto.infeasible_reason - - measurements = [] - for measure in proto.measurements: - measurements.append(MeasurementConverter.from_proto(measure)) - - creation_time = None - if proto.start_time: - creation_ts = proto.start_time.nanosecond / 1e9 - creation_time = datetime.datetime.fromtimestamp(creation_ts) - return trial.Trial( - id=int(proto.name.split('/')[-1]), - description=proto.name, - assigned_worker=proto.client_id or None, - is_requested=proto.state == study_pb2.Trial.State.REQUESTED, - stopping_reason=('stopping reason not supported yet' - if proto.state == study_pb2.Trial.State.STOPPING else None), - parameters=parameters, - creation_time=creation_time, - completion_time=completion_time, - infeasibility_reason=infeasibility_reason, - final_measurement=final_measurement, - measurements=measurements) # pytype: disable=wrong-arg-types - - @classmethod - def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[trial.Trial]: - """Convenience wrapper for from_proto.""" - return [TrialConverter.from_proto(proto) for proto in protos] - - @classmethod - def to_protos(cls, pytrials: Sequence[trial.Trial]) -> List[study_pb2.Trial]: - return [TrialConverter.to_proto(pytrial) for pytrial in pytrials] - - @classmethod - def to_proto(cls, pytrial: trial.Trial) -> study_pb2.Trial: - """Converts a pyvizier Trial to a Trial proto.""" - proto = study_pb2.Trial() - if pytrial.description is not None: - proto.name = pytrial.description - proto.id = str(pytrial.id) - proto.state = _from_pyvizier_trial_status(pytrial.status, - pytrial.infeasible) - proto.client_id = pytrial.assigned_worker or '' - - for name, value in pytrial.parameters.items(): - proto.parameters.append(ParameterValueConverter.to_proto(value, name)) - - # pytrial always adds an empty metric. Ideally, we should remove it if the - # metric does not exist in the study config. - if pytrial.final_measurement is not None: - proto.final_measurement.CopyFrom( - MeasurementConverter.to_proto(pytrial.final_measurement)) - - for measurement in pytrial.measurements: - proto.measurements.append(MeasurementConverter.to_proto(measurement)) - - if pytrial.creation_time is not None: - creation_secs = datetime.datetime.timestamp(pytrial.creation_time) - proto.start_time.seconds = int(creation_secs) - proto.start_time.nanos = int(1e9 * (creation_secs - int(creation_secs))) - if pytrial.completion_time is not None: - completion_secs = datetime.datetime.timestamp(pytrial.completion_time) - proto.end_time.seconds = int(completion_secs) - proto.end_time.nanos = int(1e9 * (completion_secs - int(completion_secs))) - if pytrial.infeasibility_reason is not None: - proto.infeasible_reason = pytrial.infeasibility_reason - return proto + """Converter for trial.TrialConverter.""" + + @classmethod + def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: + """Converts from Trial proto to object. + + Args: + proto: Trial proto. + + Returns: + A Trial object. + """ + parameters = {} + for parameter in proto.parameters: + value = ParameterValueConverter.from_proto(parameter) + if value is not None: + if parameter.parameter_id in parameters: + raise ValueError( + "Invalid trial proto contains duplicate parameter {}" + ": {}".format(parameter.parameter_id, proto) + ) + parameters[parameter.parameter_id] = value + else: + logging.warning( + "A parameter without a value will be dropped: %s", parameter + ) + + final_measurement = None + if proto.final_measurement: + final_measurement = MeasurementConverter.from_proto(proto.final_measurement) + + completion_time = None + infeasibility_reason = None + if proto.state == study_pb2.Trial.State.SUCCEEDED: + if proto.end_time: + completion_ts = proto.end_time.nanosecond / 1e9 + completion_time = datetime.datetime.fromtimestamp(completion_ts) + elif proto.state == study_pb2.Trial.State.INFEASIBLE: + infeasibility_reason = proto.infeasible_reason + + measurements = [] + for measure in proto.measurements: + measurements.append(MeasurementConverter.from_proto(measure)) + + creation_time = None + if proto.start_time: + creation_ts = proto.start_time.nanosecond / 1e9 + creation_time = datetime.datetime.fromtimestamp(creation_ts) + return trial.Trial( + id=int(proto.name.split("/")[-1]), + description=proto.name, + assigned_worker=proto.client_id or None, + is_requested=proto.state == study_pb2.Trial.State.REQUESTED, + stopping_reason=( + "stopping reason not supported yet" + if proto.state == study_pb2.Trial.State.STOPPING + else None + ), + parameters=parameters, + creation_time=creation_time, + completion_time=completion_time, + infeasibility_reason=infeasibility_reason, + final_measurement=final_measurement, + measurements=measurements, + ) # pytype: disable=wrong-arg-types + + @classmethod + def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[trial.Trial]: + """Convenience wrapper for from_proto.""" + return [TrialConverter.from_proto(proto) for proto in protos] + + @classmethod + def to_protos(cls, pytrials: Sequence[trial.Trial]) -> List[study_pb2.Trial]: + return [TrialConverter.to_proto(pytrial) for pytrial in pytrials] + + @classmethod + def to_proto(cls, pytrial: trial.Trial) -> study_pb2.Trial: + """Converts a pyvizier Trial to a Trial proto.""" + proto = study_pb2.Trial() + if pytrial.description is not None: + proto.name = pytrial.description + proto.id = str(pytrial.id) + proto.state = _from_pyvizier_trial_status(pytrial.status, pytrial.infeasible) + proto.client_id = pytrial.assigned_worker or "" + + for name, value in pytrial.parameters.items(): + proto.parameters.append(ParameterValueConverter.to_proto(value, name)) + + # pytrial always adds an empty metric. Ideally, we should remove it if the + # metric does not exist in the study config. + if pytrial.final_measurement is not None: + proto.final_measurement.CopyFrom( + MeasurementConverter.to_proto(pytrial.final_measurement) + ) + + for measurement in pytrial.measurements: + proto.measurements.append(MeasurementConverter.to_proto(measurement)) + + if pytrial.creation_time is not None: + creation_secs = datetime.datetime.timestamp(pytrial.creation_time) + proto.start_time.seconds = int(creation_secs) + proto.start_time.nanos = int(1e9 * (creation_secs - int(creation_secs))) + if pytrial.completion_time is not None: + completion_secs = datetime.datetime.timestamp(pytrial.completion_time) + proto.end_time.seconds = int(completion_secs) + proto.end_time.nanos = int(1e9 * (completion_secs - int(completion_secs))) + if pytrial.infeasibility_reason is not None: + proto.infeasible_reason = pytrial.infeasibility_reason + return proto diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index 3ce50e910c..ee5378e81b 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -19,11 +19,11 @@ import attr from google.cloud.aiplatform.vizier.pyvizier import automated_stopping -from google.cloud.aiplatform.vizier.pyvizier import proto_converters +from google.cloud.aiplatform.vizier.pyvizier import proto_converters from google.cloud.aiplatform.vizier.pyvizier import base_study_config -from google.cloud.aiplatform.vizier.pyvizier import common +from google.cloud.aiplatform.vizier.pyvizier import common from google.cloud.aiplatform.vizier.pyvizier import parameter_config -from google.cloud.aiplatform.vizier.pyvizier import trial +from google.cloud.aiplatform.vizier.pyvizier import trial from google.cloud.aiplatform.compat.types import study as study_pb2 ################### PyTypes ################### @@ -32,8 +32,13 @@ # A sequence of possible internal parameter values. MonotypeParameterSequence = parameter_config.MonotypeParameterSequence # Possible types for trial parameter values after cast to external types. -ParameterValueSequence = Union[trial.ParameterValueTypes, Sequence[int], - Sequence[float], Sequence[str], Sequence[bool]] +ParameterValueSequence = Union[ + trial.ParameterValueTypes, + Sequence[int], + Sequence[float], + Sequence[str], + Sequence[bool], +] ################### Enums ################### @@ -42,62 +47,67 @@ class Algorithm(enum.Enum): - """Valid Values for StudyConfig.Algorithm.""" - ALGORITHM_UNSPECIFIED = study_pb2.StudySpec.Algorithm.ALGORITHM_UNSPECIFIED - #GAUSSIAN_PROCESS_BANDIT = study_pb2.StudySpec.Algorithm.GAUSSIAN_PROCESS_BANDIT - GRID_SEARCH = study_pb2.StudySpec.Algorithm.GRID_SEARCH - RANDOM_SEARCH = study_pb2.StudySpec.Algorithm.RANDOM_SEARCH - #NSGA2 = study_pb2.StudySpec.Algorithm.NSGA2 - #EMUKIT_GP_EI = study_pb2.StudySpec.Algorithm.EMUKIT_GP_EI + """Valid Values for StudyConfig.Algorithm.""" + + ALGORITHM_UNSPECIFIED = study_pb2.StudySpec.Algorithm.ALGORITHM_UNSPECIFIED + # GAUSSIAN_PROCESS_BANDIT = study_pb2.StudySpec.Algorithm.GAUSSIAN_PROCESS_BANDIT + GRID_SEARCH = study_pb2.StudySpec.Algorithm.GRID_SEARCH + RANDOM_SEARCH = study_pb2.StudySpec.Algorithm.RANDOM_SEARCH + # NSGA2 = study_pb2.StudySpec.Algorithm.NSGA2 + # EMUKIT_GP_EI = study_pb2.StudySpec.Algorithm.EMUKIT_GP_EI class ObservationNoise(enum.Enum): - """Valid Values for StudyConfig.ObservationNoise.""" - OBSERVATION_NOISE_UNSPECIFIED = study_pb2.StudySpec.ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED - LOW = study_pb2.StudySpec.ObservationNoise.LOW - HIGH = study_pb2.StudySpec.ObservationNoise.HIGH + """Valid Values for StudyConfig.ObservationNoise.""" + + OBSERVATION_NOISE_UNSPECIFIED = ( + study_pb2.StudySpec.ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED + ) + LOW = study_pb2.StudySpec.ObservationNoise.LOW + HIGH = study_pb2.StudySpec.ObservationNoise.HIGH ################### Classes For Various Config Protos ################### @attr.define(frozen=False, init=True, slots=True, kw_only=True) class MetricInformationConverter: - """A wrapper for vizier_pb2.MetricInformation.""" - - @classmethod - def from_proto( - cls, proto: study_pb2.StudySpec.MetricSpec - ) -> base_study_config.MetricInformation: - """Converts a MetricInformation proto to a MetricInformation object.""" - if proto.goal not in list(ObjectiveMetricGoal): - raise ValueError('Unknown MetricInformation.goal: {}'.format(proto.goal)) - - return base_study_config.MetricInformation( - name=proto.metric_id, - goal=proto.goal, - safety_threshold=None, - safety_std_threshold=None, - min_value=None, - max_value=None) - - @classmethod - def to_proto( - cls, obj: base_study_config.MetricInformation - ) -> study_pb2.StudySpec.MetricSpec: - """Returns this object as a proto.""" - return study_pb2.StudySpec.MetricSpec( - metric_id=obj.name, goal=obj.goal.value) + """A wrapper for vizier_pb2.MetricInformation.""" + + @classmethod + def from_proto( + cls, proto: study_pb2.StudySpec.MetricSpec + ) -> base_study_config.MetricInformation: + """Converts a MetricInformation proto to a MetricInformation object.""" + if proto.goal not in list(ObjectiveMetricGoal): + raise ValueError("Unknown MetricInformation.goal: {}".format(proto.goal)) + + return base_study_config.MetricInformation( + name=proto.metric_id, + goal=proto.goal, + safety_threshold=None, + safety_std_threshold=None, + min_value=None, + max_value=None, + ) + + @classmethod + def to_proto( + cls, obj: base_study_config.MetricInformation + ) -> study_pb2.StudySpec.MetricSpec: + """Returns this object as a proto.""" + return study_pb2.StudySpec.MetricSpec(metric_id=obj.name, goal=obj.goal.value) class MetricsConfig(base_study_config.MetricsConfig): - """Metrics config.""" + """Metrics config.""" - @classmethod - def from_proto( - cls, protos: Iterable[study_pb2.StudySpec.MetricSpec]) -> 'MetricsConfig': - return cls(MetricInformationConverter.from_proto(m) for m in protos) + @classmethod + def from_proto( + cls, protos: Iterable[study_pb2.StudySpec.MetricSpec] + ) -> "MetricsConfig": + return cls(MetricInformationConverter.from_proto(m) for m in protos) - def to_proto(self) -> List[study_pb2.StudySpec.MetricSpec]: - return [MetricInformationConverter.to_proto(metric) for metric in self] + def to_proto(self) -> List[study_pb2.StudySpec.MetricSpec]: + return [MetricInformationConverter.to_proto(metric) for metric in self] SearchSpaceSelector = base_study_config.SearchSpaceSelector @@ -105,24 +115,25 @@ def to_proto(self) -> List[study_pb2.StudySpec.MetricSpec]: @attr.define(frozen=True, init=True, slots=True, kw_only=True) class SearchSpace(base_study_config.SearchSpace): - """A Selector for all, or part of a SearchSpace.""" - - @classmethod - def from_proto(cls, proto: study_pb2.StudySpec) -> 'SearchSpace': - """Extracts a SearchSpace object from a StudyConfig proto.""" - parameter_configs = [] - for pc in proto.parameters: - parameter_configs.append( - proto_converters.ParameterConfigConverter.from_proto(pc)) - return cls._factory(parameter_configs=parameter_configs) - - @property - def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: - """Returns the search space as a List of ParameterConfig protos.""" - return [ - proto_converters.ParameterConfigConverter.to_proto(pc) - for pc in self._parameter_configs - ] + """A Selector for all, or part of a SearchSpace.""" + + @classmethod + def from_proto(cls, proto: study_pb2.StudySpec) -> "SearchSpace": + """Extracts a SearchSpace object from a StudyConfig proto.""" + parameter_configs = [] + for pc in proto.parameters: + parameter_configs.append( + proto_converters.ParameterConfigConverter.from_proto(pc) + ) + return cls._factory(parameter_configs=parameter_configs) + + @property + def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: + """Returns the search space as a List of ParameterConfig protos.""" + return [ + proto_converters.ParameterConfigConverter.to_proto(pc) + for pc in self._parameter_configs + ] ################### Main Class ################### @@ -154,280 +165,301 @@ def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: # @attr.define(frozen=False, init=True, slots=True, kw_only=True) class StudyConfig(base_study_config.ProblemStatement): - """A builder and wrapper for study_pb2.StudySpec proto.""" - - search_space: SearchSpace = attr.field( - init=True, - factory=SearchSpace, - validator=attr.validators.instance_of(SearchSpace), - on_setattr=attr.setters.validate) - - algorithm: Algorithm = attr.field( - init=True, - validator=attr.validators.instance_of(Algorithm), - on_setattr=[attr.setters.convert, attr.setters.validate], - default=Algorithm.ALGORITHM_UNSPECIFIED, - kw_only=True) - - metric_information: MetricsConfig = attr.field( - init=True, - factory=MetricsConfig, - converter=MetricsConfig, - validator=attr.validators.instance_of(MetricsConfig), - kw_only=True) - - observation_noise: ObservationNoise = attr.field( - init=True, - validator=attr.validators.instance_of(ObservationNoise), - on_setattr=attr.setters.validate, - default=ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED, - kw_only=True) - - automated_stopping_config: Optional[ - automated_stopping.AutomatedStoppingConfig] = attr.field( - init=True, - default=None, - validator=attr.validators.optional( - attr.validators.instance_of( - automated_stopping.AutomatedStoppingConfig)), - on_setattr=attr.setters.validate, - kw_only=True) - - # An internal representation as a StudyConfig proto. - # If this object was created from a StudyConfig proto, a copy of the original - # proto is kept, to make sure that unknown proto fields are preserved in - # round trip serialization. - # TODO: Fix the broken proto validation. - _study_config: study_pb2.StudySpec = attr.field( - init=True, - factory=study_pb2.StudySpec, - kw_only=True) - - # Public attributes, methods and properties. - @classmethod - def from_proto(cls, proto: study_pb2.StudySpec) -> 'StudyConfig': - """Converts a StudyConfig proto to a StudyConfig object. - - Args: - proto: StudyConfig proto. - - Returns: - A StudyConfig object. - """ - metric_information = MetricsConfig( - sorted( - [MetricInformationConverter.from_proto(m) for m in proto.metrics], - key=lambda x: x.name)) - - oneof_name = proto._pb.WhichOneof('automated_stopping_spec') - if not oneof_name: - automated_stopping_config = None - else: - automated_stopping_config = automated_stopping.AutomatedStoppingConfig.from_proto( - getattr(proto, oneof_name)) - - return cls( - search_space=SearchSpace.from_proto(proto), - algorithm=Algorithm(proto.algorithm), - metric_information=metric_information, - observation_noise=ObservationNoise(proto.observation_noise), - automated_stopping_config=automated_stopping_config, - study_config=copy.deepcopy(proto)) - - def to_proto(self) -> study_pb2.StudySpec: - """Serializes this object to a StudyConfig proto.""" - proto = copy.deepcopy(self._study_config) - proto.algorithm = self.algorithm.value - proto.observation_noise = self.observation_noise.value - - del proto.metrics[:] - proto.metrics.extend(self.metric_information.to_proto()) - - del proto.parameters[:] - proto.parameters.extend(self.search_space.parameter_protos) - - if self.automated_stopping_config is not None: - auto_stop_proto = self.automated_stopping_config.to_proto() - if isinstance(auto_stop_proto, - study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec): - for method_name in dir(proto.decay_curve_stopping_spec): - if callable(getattr(proto.decay_curve_stopping_spec, method_name)): - print(method_name) - proto.decay_curve_stopping_spec = copy.deepcopy(auto_stop_proto) - elif isinstance(auto_stop_proto, - study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec): - for method_name in dir(proto.decay_curve_stopping_spec): - if callable(getattr(proto.median_automated_stopping_spec, method_name)): - print(method_name) - proto.median_automated_stopping_spec = copy.deepcopy(auto_stop_proto) - - return proto - - @property - def is_single_objective(self) -> bool: - """Returns True if only one objective metric is configured.""" - return len(self.metric_information) == 1 - - @property - def single_objective_metric_name(self) -> Optional[str]: - """Returns the name of the single-objective metric, if set. - - Returns: - String: name of the single-objective metric. - None: if this is not a single-objective study. - """ - if len(self.metric_information) == 1: - return list(self.metric_information)[0].name - return None - - def _trial_to_external_values( - self, pytrial: trial.Trial) -> Dict[str, Union[float, int, str, bool]]: - """Returns the trial paremeter values cast to external types.""" - parameter_values: Dict[str, Union[float, int, str]] = {} - external_values: Dict[str, Union[float, int, str, bool]] = {} - # parameter_configs is a list of Tuple[parent_name, ParameterConfig]. - parameter_configs: List[Tuple[Optional[str], - parameter_config.ParameterConfig]] = [ - (None, p) - for p in self.search_space.parameters - ] - remaining_parameters = copy.deepcopy(pytrial.parameters) - # Traverse the conditional tree using a BFS. - while parameter_configs and remaining_parameters: - parent_name, pc = parameter_configs.pop(0) - parameter_configs.extend( - (pc.name, child) for child in pc.child_parameter_configs) - if pc.name not in remaining_parameters: - continue - if parent_name is not None: - # This is a child parameter. If the parent was not seen, - # skip this parameter config. - if parent_name not in parameter_values: - continue - parent_value = parameter_values[parent_name] - if parent_value not in pc.matching_parent_values: - continue - parameter_values[pc.name] = remaining_parameters[pc.name].value - if pc.external_type is None: - external_value = remaining_parameters[pc.name].value - else: - external_value = remaining_parameters[pc.name].cast(pc.external_type) # pytype: disable=wrong-arg-types - external_values[pc.name] = external_value - remaining_parameters.pop(pc.name) - return external_values - - def trial_parameters( - self, proto: study_pb2.Trial) -> Dict[str, ParameterValueSequence]: - """Returns the trial values, cast to external types, if they exist. - - Args: - proto: - - Returns: - Parameter values dict: cast to each parameter's external_type, if exists. - NOTE that the values in the dict may be a Sequence as opposed to a single - element. - - Raises: - ValueError: If the trial parameters do not exist in this search space. - ValueError: If the trial contains duplicate parameters. - """ - pytrial = proto_converters.TrialConverter.from_proto(proto) - return self._pytrial_parameters(pytrial) - - def _pytrial_parameters( - self, pytrial: trial.Trial) -> Dict[str, ParameterValueSequence]: - """Returns the trial values, cast to external types, if they exist. - - Args: - pytrial: - - Returns: - Parameter values dict: cast to each parameter's external_type, if exists. - NOTE that the values in the dict may be a Sequence as opposed to a single - element. - - Raises: - ValueError: If the trial parameters do not exist in this search space. - ValueError: If the trial contains duplicate parameters. - """ - trial_external_values: Dict[str, Union[float, int, str, bool]] = ( - self._trial_to_external_values(pytrial)) - if len(trial_external_values) != len(pytrial.parameters): - raise ValueError('Invalid trial for this search space: failed to convert ' - 'all trial parameters: {}'.format(pytrial)) - - # Combine multi-dimensional parameter values to a list of values. - trial_final_values: Dict[str, ParameterValueSequence] = {} - # multi_dim_params: Dict[str, List[Tuple[int, ParameterValueSequence]]] - multi_dim_params = collections.defaultdict(list) - for name in trial_external_values: - base_index = SearchSpaceSelector.parse_multi_dimensional_parameter_name( - name) - if base_index is None: - trial_final_values[name] = trial_external_values[name] - else: - base_name, index = base_index - multi_dim_params[base_name].append((index, trial_external_values[name])) - for name in multi_dim_params: - multi_dim_params[name].sort(key=lambda x: x[0]) - trial_final_values[name] = [x[1] for x in multi_dim_params[name]] - - return trial_final_values - - def trial_metrics(self, - proto: study_pb2.Trial, - *, - include_all_metrics=False) -> Dict[str, float]: - """Returns the trial's final measurement metric values. - - If the trial is not completed, or infeasible, no metrics are returned. - By default, only metrics configured in the StudyConfig are returned - (e.g. only objective and safety metrics). - - Args: - proto: - include_all_metrics: If True, all metrics in the final measurements are - returned. If False, only metrics configured in the StudyConfig are - returned. - - Returns: - Dict[metric name, metric value] - """ - pytrial = proto_converters.TrialConverter.from_proto(proto) - return self._pytrial_metrics( - pytrial, include_all_metrics=include_all_metrics) - - def _pytrial_metrics(self, - pytrial: trial.Trial, - *, - include_all_metrics=False) -> Dict[str, float]: - """Returns the trial's final measurement metric values. - - If the trial is not completed, or infeasible, no metrics are returned. - By default, only metrics configured in the StudyConfig are returned - (e.g. only objective and safety metrics). - - Args: - pytrial: - include_all_metrics: If True, all metrics in the final measurements are - returned. If False, only metrics configured in the StudyConfig are - returned. - - Returns: - Dict[metric name, metric value] - """ - configured_metrics = [m.name for m in self.metric_information] - - metrics: Dict[str, float] = {} - if pytrial.is_completed and not pytrial.infeasible: - for name in pytrial.final_measurement.metrics: - if (include_all_metrics or - (not include_all_metrics and name in configured_metrics)): - # Special case: Measurement always adds an empty metric by default. - # If there is a named single objective in study_config, drop the empty - # metric. - if not name and self.single_objective_metric_name != name: - continue - metrics[name] = pytrial.final_measurement.metrics[name].value - return metrics + """A builder and wrapper for study_pb2.StudySpec proto.""" + + search_space: SearchSpace = attr.field( + init=True, + factory=SearchSpace, + validator=attr.validators.instance_of(SearchSpace), + on_setattr=attr.setters.validate, + ) + + algorithm: Algorithm = attr.field( + init=True, + validator=attr.validators.instance_of(Algorithm), + on_setattr=[attr.setters.convert, attr.setters.validate], + default=Algorithm.ALGORITHM_UNSPECIFIED, + kw_only=True, + ) + + metric_information: MetricsConfig = attr.field( + init=True, + factory=MetricsConfig, + converter=MetricsConfig, + validator=attr.validators.instance_of(MetricsConfig), + kw_only=True, + ) + + observation_noise: ObservationNoise = attr.field( + init=True, + validator=attr.validators.instance_of(ObservationNoise), + on_setattr=attr.setters.validate, + default=ObservationNoise.OBSERVATION_NOISE_UNSPECIFIED, + kw_only=True, + ) + + automated_stopping_config: Optional[ + automated_stopping.AutomatedStoppingConfig + ] = attr.field( + init=True, + default=None, + validator=attr.validators.optional( + attr.validators.instance_of(automated_stopping.AutomatedStoppingConfig) + ), + on_setattr=attr.setters.validate, + kw_only=True, + ) + + # An internal representation as a StudyConfig proto. + # If this object was created from a StudyConfig proto, a copy of the original + # proto is kept, to make sure that unknown proto fields are preserved in + # round trip serialization. + # TODO: Fix the broken proto validation. + _study_config: study_pb2.StudySpec = attr.field( + init=True, factory=study_pb2.StudySpec, kw_only=True + ) + + # Public attributes, methods and properties. + @classmethod + def from_proto(cls, proto: study_pb2.StudySpec) -> "StudyConfig": + """Converts a StudyConfig proto to a StudyConfig object. + + Args: + proto: StudyConfig proto. + + Returns: + A StudyConfig object. + """ + metric_information = MetricsConfig( + sorted( + [MetricInformationConverter.from_proto(m) for m in proto.metrics], + key=lambda x: x.name, + ) + ) + + oneof_name = proto._pb.WhichOneof("automated_stopping_spec") + if not oneof_name: + automated_stopping_config = None + else: + automated_stopping_config = ( + automated_stopping.AutomatedStoppingConfig.from_proto( + getattr(proto, oneof_name) + ) + ) + + return cls( + search_space=SearchSpace.from_proto(proto), + algorithm=Algorithm(proto.algorithm), + metric_information=metric_information, + observation_noise=ObservationNoise(proto.observation_noise), + automated_stopping_config=automated_stopping_config, + study_config=copy.deepcopy(proto), + ) + + def to_proto(self) -> study_pb2.StudySpec: + """Serializes this object to a StudyConfig proto.""" + proto = copy.deepcopy(self._study_config) + proto.algorithm = self.algorithm.value + proto.observation_noise = self.observation_noise.value + + del proto.metrics[:] + proto.metrics.extend(self.metric_information.to_proto()) + + del proto.parameters[:] + proto.parameters.extend(self.search_space.parameter_protos) + + if self.automated_stopping_config is not None: + auto_stop_proto = self.automated_stopping_config.to_proto() + if isinstance( + auto_stop_proto, study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec + ): + for method_name in dir(proto.decay_curve_stopping_spec): + if callable(getattr(proto.decay_curve_stopping_spec, method_name)): + print(method_name) + proto.decay_curve_stopping_spec = copy.deepcopy(auto_stop_proto) + elif isinstance( + auto_stop_proto, study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec + ): + for method_name in dir(proto.decay_curve_stopping_spec): + if callable( + getattr(proto.median_automated_stopping_spec, method_name) + ): + print(method_name) + proto.median_automated_stopping_spec = copy.deepcopy(auto_stop_proto) + + return proto + + @property + def is_single_objective(self) -> bool: + """Returns True if only one objective metric is configured.""" + return len(self.metric_information) == 1 + + @property + def single_objective_metric_name(self) -> Optional[str]: + """Returns the name of the single-objective metric, if set. + + Returns: + String: name of the single-objective metric. + None: if this is not a single-objective study. + """ + if len(self.metric_information) == 1: + return list(self.metric_information)[0].name + return None + + def _trial_to_external_values( + self, pytrial: trial.Trial + ) -> Dict[str, Union[float, int, str, bool]]: + """Returns the trial paremeter values cast to external types.""" + parameter_values: Dict[str, Union[float, int, str]] = {} + external_values: Dict[str, Union[float, int, str, bool]] = {} + # parameter_configs is a list of Tuple[parent_name, ParameterConfig]. + parameter_configs: List[ + Tuple[Optional[str], parameter_config.ParameterConfig] + ] = [(None, p) for p in self.search_space.parameters] + remaining_parameters = copy.deepcopy(pytrial.parameters) + # Traverse the conditional tree using a BFS. + while parameter_configs and remaining_parameters: + parent_name, pc = parameter_configs.pop(0) + parameter_configs.extend( + (pc.name, child) for child in pc.child_parameter_configs + ) + if pc.name not in remaining_parameters: + continue + if parent_name is not None: + # This is a child parameter. If the parent was not seen, + # skip this parameter config. + if parent_name not in parameter_values: + continue + parent_value = parameter_values[parent_name] + if parent_value not in pc.matching_parent_values: + continue + parameter_values[pc.name] = remaining_parameters[pc.name].value + if pc.external_type is None: + external_value = remaining_parameters[pc.name].value + else: + external_value = remaining_parameters[pc.name].cast( + pc.external_type + ) # pytype: disable=wrong-arg-types + external_values[pc.name] = external_value + remaining_parameters.pop(pc.name) + return external_values + + def trial_parameters( + self, proto: study_pb2.Trial + ) -> Dict[str, ParameterValueSequence]: + """Returns the trial values, cast to external types, if they exist. + + Args: + proto: + + Returns: + Parameter values dict: cast to each parameter's external_type, if exists. + NOTE that the values in the dict may be a Sequence as opposed to a single + element. + + Raises: + ValueError: If the trial parameters do not exist in this search space. + ValueError: If the trial contains duplicate parameters. + """ + pytrial = proto_converters.TrialConverter.from_proto(proto) + return self._pytrial_parameters(pytrial) + + def _pytrial_parameters( + self, pytrial: trial.Trial + ) -> Dict[str, ParameterValueSequence]: + """Returns the trial values, cast to external types, if they exist. + + Args: + pytrial: + + Returns: + Parameter values dict: cast to each parameter's external_type, if exists. + NOTE that the values in the dict may be a Sequence as opposed to a single + element. + + Raises: + ValueError: If the trial parameters do not exist in this search space. + ValueError: If the trial contains duplicate parameters. + """ + trial_external_values: Dict[ + str, Union[float, int, str, bool] + ] = self._trial_to_external_values(pytrial) + if len(trial_external_values) != len(pytrial.parameters): + raise ValueError( + "Invalid trial for this search space: failed to convert " + "all trial parameters: {}".format(pytrial) + ) + + # Combine multi-dimensional parameter values to a list of values. + trial_final_values: Dict[str, ParameterValueSequence] = {} + # multi_dim_params: Dict[str, List[Tuple[int, ParameterValueSequence]]] + multi_dim_params = collections.defaultdict(list) + for name in trial_external_values: + base_index = SearchSpaceSelector.parse_multi_dimensional_parameter_name( + name + ) + if base_index is None: + trial_final_values[name] = trial_external_values[name] + else: + base_name, index = base_index + multi_dim_params[base_name].append((index, trial_external_values[name])) + for name in multi_dim_params: + multi_dim_params[name].sort(key=lambda x: x[0]) + trial_final_values[name] = [x[1] for x in multi_dim_params[name]] + + return trial_final_values + + def trial_metrics( + self, proto: study_pb2.Trial, *, include_all_metrics=False + ) -> Dict[str, float]: + """Returns the trial's final measurement metric values. + + If the trial is not completed, or infeasible, no metrics are returned. + By default, only metrics configured in the StudyConfig are returned + (e.g. only objective and safety metrics). + + Args: + proto: + include_all_metrics: If True, all metrics in the final measurements are + returned. If False, only metrics configured in the StudyConfig are + returned. + + Returns: + Dict[metric name, metric value] + """ + pytrial = proto_converters.TrialConverter.from_proto(proto) + return self._pytrial_metrics(pytrial, include_all_metrics=include_all_metrics) + + def _pytrial_metrics( + self, pytrial: trial.Trial, *, include_all_metrics=False + ) -> Dict[str, float]: + """Returns the trial's final measurement metric values. + + If the trial is not completed, or infeasible, no metrics are returned. + By default, only metrics configured in the StudyConfig are returned + (e.g. only objective and safety metrics). + + Args: + pytrial: + include_all_metrics: If True, all metrics in the final measurements are + returned. If False, only metrics configured in the StudyConfig are + returned. + + Returns: + Dict[metric name, metric value] + """ + configured_metrics = [m.name for m in self.metric_information] + + metrics: Dict[str, float] = {} + if pytrial.is_completed and not pytrial.infeasible: + for name in pytrial.final_measurement.metrics: + if include_all_metrics or ( + not include_all_metrics and name in configured_metrics + ): + # Special case: Measurement always adds an empty metric by default. + # If there is a named single objective in study_config, drop the empty + # metric. + if not name and self.single_objective_metric_name != name: + continue + metrics[name] = pytrial.final_measurement.metrics[name].value + return metrics diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py index b86364ba21..2b6f3c5c57 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -25,47 +25,50 @@ class ExternalType(enum.Enum): - """Valid Values for ParameterConfig.external_type.""" - INTERNAL = 'INTERNAL' - BOOLEAN = 'BOOLEAN' - INTEGER = 'INTEGER' - FLOAT = 'FLOAT' + """Valid Values for ParameterConfig.external_type.""" + + INTERNAL = "INTERNAL" + BOOLEAN = "BOOLEAN" + INTEGER = "INTEGER" + FLOAT = "FLOAT" # Values should NEVER be removed from the enums below, only added. class TrialStatus(enum.Enum): - """Values for Trial.Status.""" - UNKNOWN = 'UNKNOWN' - REQUESTED = 'REQUESTED' - ACTIVE = 'ACTIVE' - COMPLETED = 'COMPLETED' - STOPPING = 'STOPPING' + """Values for Trial.Status.""" + + UNKNOWN = "UNKNOWN" + REQUESTED = "REQUESTED" + ACTIVE = "ACTIVE" + COMPLETED = "COMPLETED" + STOPPING = "STOPPING" @attr.s(frozen=True, init=True, slots=True, kw_only=False) class Metric: - """Enhanced immutable wrapper for vizier_pb2.Metric proto. - - It has an additional field "std" for internal usage. This field gets lost - when the object is converted to proto. - """ - - def _std_not_negative(self, _, stddev): - if stddev < 0: - raise ValueError( - 'Standard deviation must be a non-negative finite number.') - - value: float = attr.ib( - converter=float, - init=True, - validator=[attr.validators.instance_of(float)], - kw_only=False) - std: float = attr.ib( - converter=float, - validator=[attr.validators.instance_of(float), _std_not_negative], - init=True, - default=0.0, - kw_only=True) + """Enhanced immutable wrapper for vizier_pb2.Metric proto. + + It has an additional field "std" for internal usage. This field gets lost + when the object is converted to proto. + """ + + def _std_not_negative(self, _, stddev): + if stddev < 0: + raise ValueError("Standard deviation must be a non-negative finite number.") + + value: float = attr.ib( + converter=float, + init=True, + validator=[attr.validators.instance_of(float)], + kw_only=False, + ) + std: float = attr.ib( + converter=float, + validator=[attr.validators.instance_of(float), _std_not_negative], + init=True, + default=0.0, + kw_only=True, + ) # Use when you want to preserve the shapes or reduce if-else statements. @@ -76,385 +79,391 @@ def _std_not_negative(self, _, stddev): @attr.s(auto_attribs=True, frozen=True, init=True, slots=True, repr=False) class ParameterValue: - """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. - - Has accessors (properties) that cast the value into the type according - to StudyConfiguration class behavior. In particular, 'true' and 'false' are - treated as special strings that are cast to a numeric value of 1 and 0, - respectively, and boolean value of True and False, repectively. - """ - - value: ParameterValueTypes = attr.ib( - init=True, - validator=[ - attr.validators.instance_of((str, int, float, bool)), - ]) - - def cast( - self, - external_type: ExternalType, - ) -> ParameterValueTypes: - """Returns ParameterValue cast to external_type. - - Args: - external_type: - - Returns: - self.value if external_type is INTERNAL. - self.as_bool if external_type is BOOLEAN. - self.as_int if external_type is INTEGER. - self.as_float if external_type is FLOAT. - - Raises: - ValueError: If external_type is not valid. - """ - if external_type == ExternalType.INTERNAL: - return self.value - elif external_type == ExternalType.BOOLEAN: - return self.as_bool - elif external_type == ExternalType.INTEGER: - return self.as_int - elif external_type == ExternalType.FLOAT: - return self.as_float - else: - raise ValueError( - 'Unknown external type enum value: {}.'.format(external_type)) - - @property - def as_float(self) -> Optional[float]: - """Returns the value cast to float.""" - if self.value == 'true': - return 1.0 - elif self.value == 'false': - return 0.0 - elif isinstance(self.value, str): - return None - return float(self.value) - - @property - def as_int(self) -> Optional[int]: - """Returns the value cast to int.""" - if self.value == 'true': - return 1 - elif self.value == 'false': - return 0 - elif isinstance(self.value, str): - return None - return int(self.value) - - @property - def as_str(self) -> Optional[str]: - """Returns str-typed value or lowercase 'true'/'false' if value is bool.""" - if isinstance(self.value, bool): - return str(self.value).lower() - elif isinstance(self.value, str): - return self.value - return None - - @property - def as_bool(self) -> Optional[bool]: - """Returns the value as bool following StudyConfiguration's behavior. - - Returns: True if value is 'true' or 1. False if value is - 'false' or 0. For all other cases, returns None. - For string type, this behavior is consistent with how - StudyConfiguration.AddBooleanParameter's. For other types, this - guarantees that self.value == self.as_bool - """ - if isinstance(self.value, str): - if self.value.lower() == 'true': - return True - elif self.value.lower() == 'false': - return False - else: - if self.value == 1.0: - return True - elif self.value == 0.0: - return False - return None + """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. - def __str__(self) -> str: - return str(self.value) + Has accessors (properties) that cast the value into the type according + to StudyConfiguration class behavior. In particular, 'true' and 'false' are + treated as special strings that are cast to a numeric value of 1 and 0, + respectively, and boolean value of True and False, repectively. + """ - def __repr__(self) -> str: - return str(self.value) + value: ParameterValueTypes = attr.ib( + init=True, + validator=[ + attr.validators.instance_of((str, int, float, bool)), + ], + ) + + def cast( + self, + external_type: ExternalType, + ) -> ParameterValueTypes: + """Returns ParameterValue cast to external_type. + + Args: + external_type: + + Returns: + self.value if external_type is INTERNAL. + self.as_bool if external_type is BOOLEAN. + self.as_int if external_type is INTEGER. + self.as_float if external_type is FLOAT. + + Raises: + ValueError: If external_type is not valid. + """ + if external_type == ExternalType.INTERNAL: + return self.value + elif external_type == ExternalType.BOOLEAN: + return self.as_bool + elif external_type == ExternalType.INTEGER: + return self.as_int + elif external_type == ExternalType.FLOAT: + return self.as_float + else: + raise ValueError( + "Unknown external type enum value: {}.".format(external_type) + ) + + @property + def as_float(self) -> Optional[float]: + """Returns the value cast to float.""" + if self.value == "true": + return 1.0 + elif self.value == "false": + return 0.0 + elif isinstance(self.value, str): + return None + return float(self.value) + + @property + def as_int(self) -> Optional[int]: + """Returns the value cast to int.""" + if self.value == "true": + return 1 + elif self.value == "false": + return 0 + elif isinstance(self.value, str): + return None + return int(self.value) + + @property + def as_str(self) -> Optional[str]: + """Returns str-typed value or lowercase 'true'/'false' if value is bool.""" + if isinstance(self.value, bool): + return str(self.value).lower() + elif isinstance(self.value, str): + return self.value + return None + + @property + def as_bool(self) -> Optional[bool]: + """Returns the value as bool following StudyConfiguration's behavior. + + Returns: True if value is 'true' or 1. False if value is + 'false' or 0. For all other cases, returns None. + For string type, this behavior is consistent with how + StudyConfiguration.AddBooleanParameter's. For other types, this + guarantees that self.value == self.as_bool + """ + if isinstance(self.value, str): + if self.value.lower() == "true": + return True + elif self.value.lower() == "false": + return False + else: + if self.value == 1.0: + return True + elif self.value == 0.0: + return False + return None + + def __str__(self) -> str: + return str(self.value) + + def __repr__(self) -> str: + return str(self.value) class _MetricDict(collections.UserDict): - - def __setitem__(self, key: str, value: Union[float, Metric]): - if isinstance(value, Metric): - self.data.__setitem__(key, value) - else: - self.data.__setitem__(key, Metric(value=value)) + def __setitem__(self, key: str, value: Union[float, Metric]): + if isinstance(value, Metric): + self.data.__setitem__(key, value) + else: + self.data.__setitem__(key, Metric(value=value)) @attr.s(auto_attribs=True, frozen=False, init=True, slots=True) class Measurement: - """Collection of metrics with a timestamp.""" - - def _value_is_finite(self, _, value): - if not (np.isfinite(value) and value >= 0): - raise ValueError('Must be finite and non-negative.') - - # Should be used as a regular Dict. - metrics: MutableMapping[str, Metric] = attr.ib( - init=True, - converter=lambda d: _MetricDict(**d), - default=_MetricDict(), - validator=attr.validators.instance_of(_MetricDict), - on_setattr=[attr.setters.convert, attr.setters.validate]) - - elapsed_secs: float = attr.ib( - converter=float, - init=True, - default=0, - validator=[attr.validators.instance_of(float), _value_is_finite], - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True) - - steps: float = attr.ib( - converter=int, - init=True, - default=0, - validator=[attr.validators.instance_of(int), _value_is_finite], - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True) - - -def _to_local_time( - dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: - """Converter for initializing timestamps in Trial class.""" - return dt.astimezone() if dt else None + """Collection of metrics with a timestamp.""" + + def _value_is_finite(self, _, value): + if not (np.isfinite(value) and value >= 0): + raise ValueError("Must be finite and non-negative.") + + # Should be used as a regular Dict. + metrics: MutableMapping[str, Metric] = attr.ib( + init=True, + converter=lambda d: _MetricDict(**d), + default=_MetricDict(), + validator=attr.validators.instance_of(_MetricDict), + on_setattr=[attr.setters.convert, attr.setters.validate], + ) + + elapsed_secs: float = attr.ib( + converter=float, + init=True, + default=0, + validator=[attr.validators.instance_of(float), _value_is_finite], + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True, + ) + + steps: float = attr.ib( + converter=int, + init=True, + default=0, + validator=[attr.validators.instance_of(int), _value_is_finite], + on_setattr=[attr.setters.convert, attr.setters.validate], + kw_only=True, + ) + + +def _to_local_time(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: + """Converter for initializing timestamps in Trial class.""" + return dt.astimezone() if dt else None @attr.define(init=False, frozen=True, eq=True) class ParameterDict(cabc.MutableMapping): - """Parameter dictionary. + """Parameter dictionary. - Maps the parameter names to their values. Works like a regular - dict[str, ParameterValue] for the most part, except one can directly assign - values of type `ParameterValueType`. So, - ParameterDict(a=3) and - ParameterDict(a=ParameterValue(3)) are equivalent. + Maps the parameter names to their values. Works like a regular + dict[str, ParameterValue] for the most part, except one can directly assign + values of type `ParameterValueType`. So, + ParameterDict(a=3) and + ParameterDict(a=ParameterValue(3)) are equivalent. - To access the raw value directly, use get_value() method. - d.get('a').value == d.get_value('a') - """ + To access the raw value directly, use get_value() method. + d.get('a').value == d.get_value('a') + """ - _items: MutableMapping[str, ParameterValue] = attr.field( - init=False, factory=dict) + _items: MutableMapping[str, ParameterValue] = attr.field(init=False, factory=dict) - def __init__(self, iterable: Any = tuple(), **kwargs): - self.__attrs_init__() - self.update(iterable, **kwargs) + def __init__(self, iterable: Any = tuple(), **kwargs): + self.__attrs_init__() + self.update(iterable, **kwargs) - def __setitem__(self, key: str, value: Union[ParameterValue, - ParameterValueTypes]): - if isinstance(value, ParameterValue): - self._items[key] = value - else: - self._items[key] = ParameterValue(value) + def __setitem__(self, key: str, value: Union[ParameterValue, ParameterValueTypes]): + if isinstance(value, ParameterValue): + self._items[key] = value + else: + self._items[key] = ParameterValue(value) - def __delitem__(self, key: str): - del self._items[key] + def __delitem__(self, key: str): + del self._items[key] - def __getitem__(self, key: str) -> ParameterValue: - return self._items[key] + def __getitem__(self, key: str) -> ParameterValue: + return self._items[key] - def __len__(self) -> int: - return len(self._items) + def __len__(self) -> int: + return len(self._items) - def __iter__(self): - return iter(self._items) + def __iter__(self): + return iter(self._items) - def get_value( - self, - key: str, - default: Optional[ParameterValueTypes] = None - ) -> Optional[ParameterValueTypes]: - pv = self.get(key, default) - if isinstance(pv, ParameterValue): - return pv.value - else: - return pv + def get_value( + self, key: str, default: Optional[ParameterValueTypes] = None + ) -> Optional[ParameterValueTypes]: + pv = self.get(key, default) + if isinstance(pv, ParameterValue): + return pv.value + else: + return pv @attr.define(auto_attribs=True, frozen=False, init=True, slots=True) class Trial: - """Wrapper for learning_vizier.service.Trial proto.""" - id: int = attr.ib( - init=True, - kw_only=True, - default=0, - validator=attr.validators.instance_of(int), - ) - - _is_requested: bool = attr.ib( - init=True, - kw_only=True, - default=False, - validator=attr.validators.instance_of(bool)) - - assigned_worker: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - stopping_reason: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - _infeasibility_reason: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - description: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - parameters: ParameterDict = attr.field( - init=True, - kw_only=True, - factory=ParameterDict, - converter=ParameterDict, - validator=attr.validators.instance_of(ParameterDict)) - - related_links: Dict[str, str] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(str), - mapping_validator=attr.validators.instance_of(dict)), - ) # pytype: disable=wrong-arg-types - - final_measurement: Optional[Measurement] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional( - attr.validators.instance_of(Measurement)), - ) - - measurements: List[Measurement] = attr.ib( - init=True, - kw_only=True, - default=list(), - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(Measurement), - iterable_validator=attr.validators.instance_of(list)), - ) - - creation_time: Optional[datetime.datetime] = attr.ib( - init=True, - default=datetime.datetime.now(), - converter=_to_local_time, - kw_only=True, - repr=lambda v: v.strftime('%x %X') if v is not None else 'None', - validator=attr.validators.optional( - attr.validators.instance_of(datetime.datetime)), - ) - - completion_time: Optional[datetime.datetime] = attr.ib( - init=True, - kw_only=True, - default=None, - repr=lambda v: v.strftime('%x %X') if v is not None else 'None', - converter=_to_local_time, - validator=attr.validators.optional( - attr.validators.instance_of(datetime.datetime)), - ) - - @property - def duration(self) -> Optional[datetime.timedelta]: - """Returns the duration of this Trial if it is completed, or None.""" - if self.completion_time: - return self.completion_time - self.creation_time - else: - return None - - @property - def status(self) -> TrialStatus: - """Status. - - COMPLETED: Trial has final measurement or is declared infeasible. - ACTIVE: Trial is being evaluated. - STOPPING: Trial is being evaluated, but was decided to be not worth further - evaluating. - REQUESTED: Trial is queued for future suggestions. - """ - if self.final_measurement is not None or self.infeasible: - return TrialStatus.COMPLETED - elif self.stopping_reason is not None: - return TrialStatus.STOPPING - elif self._is_requested: - return TrialStatus.REQUESTED - else: - return TrialStatus.ACTIVE - - @property - def is_completed(self) -> bool: - """Returns True if this Trial is completed.""" - if self.status == TrialStatus.COMPLETED: - if self.completion_time is None: - logging.warning('Invalid Trial state: status is COMPLETED, but a ' - ' completion_time was not set') - return True - elif self.completion_time is not None: - if self.status is None: - logging.warning('Invalid Trial state: status is not set to COMPLETED, ' - 'but a completion_time is set') - return True - return False - - @property - def infeasible(self) -> bool: - """Returns True if this Trial is infeasible.""" - return self._infeasibility_reason is not None - - @property - def infeasibility_reason(self) -> Optional[str]: - """Returns this Trial's infeasibility reason, if set.""" - return self._infeasibility_reason - - def complete(self, - measurement: Measurement, - *, - inplace: bool = True) -> 'Trial': - """Completes the trial and returns it. - - Args: - measurement: Measurement to complete the trial with. - inplace: If True, Trial is modified in place. If False, which is the - default, then the operation is performed and it returns a copy of the - object - - Returns: - Completed Trial. - """ - if inplace: - # Use setattr. If we assign to self.final_measurement, then hyperref - # mechanisms think this line is where `final_measurement` property - # is defined, instead of where we declare attr.ib. - self.__setattr__('final_measurement', copy.deepcopy(measurement)) - self.completion_time = _to_local_time(datetime.datetime.now()) - return self - else: - clone = copy.deepcopy(self) - return clone.complete(measurement, inplace=True) + """Wrapper for learning_vizier.service.Trial proto.""" + + id: int = attr.ib( + init=True, + kw_only=True, + default=0, + validator=attr.validators.instance_of(int), + ) + + _is_requested: bool = attr.ib( + init=True, + kw_only=True, + default=False, + validator=attr.validators.instance_of(bool), + ) + + assigned_worker: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + stopping_reason: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + _infeasibility_reason: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + description: Optional[str] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) + + parameters: ParameterDict = attr.field( + init=True, + kw_only=True, + factory=ParameterDict, + converter=ParameterDict, + validator=attr.validators.instance_of(ParameterDict), + ) + + related_links: Dict[str, str] = attr.ib( + init=True, + kw_only=True, + factory=dict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(str), + mapping_validator=attr.validators.instance_of(dict), + ), + ) # pytype: disable=wrong-arg-types + + final_measurement: Optional[Measurement] = attr.ib( + init=True, + kw_only=True, + default=None, + validator=attr.validators.optional(attr.validators.instance_of(Measurement)), + ) + + measurements: List[Measurement] = attr.ib( + init=True, + kw_only=True, + default=list(), + validator=attr.validators.deep_iterable( + member_validator=attr.validators.instance_of(Measurement), + iterable_validator=attr.validators.instance_of(list), + ), + ) + + creation_time: Optional[datetime.datetime] = attr.ib( + init=True, + default=datetime.datetime.now(), + converter=_to_local_time, + kw_only=True, + repr=lambda v: v.strftime("%x %X") if v is not None else "None", + validator=attr.validators.optional( + attr.validators.instance_of(datetime.datetime) + ), + ) + + completion_time: Optional[datetime.datetime] = attr.ib( + init=True, + kw_only=True, + default=None, + repr=lambda v: v.strftime("%x %X") if v is not None else "None", + converter=_to_local_time, + validator=attr.validators.optional( + attr.validators.instance_of(datetime.datetime) + ), + ) + + @property + def duration(self) -> Optional[datetime.timedelta]: + """Returns the duration of this Trial if it is completed, or None.""" + if self.completion_time: + return self.completion_time - self.creation_time + else: + return None + + @property + def status(self) -> TrialStatus: + """Status. + + COMPLETED: Trial has final measurement or is declared infeasible. + ACTIVE: Trial is being evaluated. + STOPPING: Trial is being evaluated, but was decided to be not worth further + evaluating. + REQUESTED: Trial is queued for future suggestions. + """ + if self.final_measurement is not None or self.infeasible: + return TrialStatus.COMPLETED + elif self.stopping_reason is not None: + return TrialStatus.STOPPING + elif self._is_requested: + return TrialStatus.REQUESTED + else: + return TrialStatus.ACTIVE + + @property + def is_completed(self) -> bool: + """Returns True if this Trial is completed.""" + if self.status == TrialStatus.COMPLETED: + if self.completion_time is None: + logging.warning( + "Invalid Trial state: status is COMPLETED, but a " + " completion_time was not set" + ) + return True + elif self.completion_time is not None: + if self.status is None: + logging.warning( + "Invalid Trial state: status is not set to COMPLETED, " + "but a completion_time is set" + ) + return True + return False + + @property + def infeasible(self) -> bool: + """Returns True if this Trial is infeasible.""" + return self._infeasibility_reason is not None + + @property + def infeasibility_reason(self) -> Optional[str]: + """Returns this Trial's infeasibility reason, if set.""" + return self._infeasibility_reason + + def complete(self, measurement: Measurement, *, inplace: bool = True) -> "Trial": + """Completes the trial and returns it. + + Args: + measurement: Measurement to complete the trial with. + inplace: If True, Trial is modified in place. If False, which is the + default, then the operation is performed and it returns a copy of the + object + + Returns: + Completed Trial. + """ + if inplace: + # Use setattr. If we assign to self.final_measurement, then hyperref + # mechanisms think this line is where `final_measurement` property + # is defined, instead of where we declare attr.ib. + self.__setattr__("final_measurement", copy.deepcopy(measurement)) + self.completion_time = _to_local_time(datetime.datetime.now()) + return self + else: + clone = copy.deepcopy(self) + return clone.complete(measurement, inplace=True) # Define aliases. @@ -466,72 +475,80 @@ def complete(self, @attr.frozen class TrialSuggestion: - """Freshly suggested trial. + """Freshly suggested trial. + + Suggestion can be converted to Trial object which has more functionalities. + """ - Suggestion can be converted to Trial object which has more functionalities. - """ + parameters: ParameterDict = attr.field( + init=True, + factory=ParameterDict, + converter=ParameterDict, + validator=attr.validators.instance_of(ParameterDict), + ) # pytype: disable=wrong-arg-types - parameters: ParameterDict = attr.field( - init=True, - factory=ParameterDict, - converter=ParameterDict, - validator=attr.validators.instance_of(ParameterDict)) # pytype: disable=wrong-arg-types + def to_trial(self, uid: int) -> Trial: + """Assign an id and make it a Trial object. - def to_trial(self, uid: int) -> Trial: - """Assign an id and make it a Trial object. + Usually SuggetedTrial objects are shorted-lived and not exposed to end + users. This method is for non-service usage of trial suggestions in + benchmarks, tests, colabs, etc. - Usually SuggetedTrial objects are shorted-lived and not exposed to end - users. This method is for non-service usage of trial suggestions in - benchmarks, tests, colabs, etc. + Args: + uid: Trial id. - Args: - uid: Trial id. + Returns: + Trial object. + """ + return Trial(id=uid, parameters=self.parameters) - Returns: - Trial object. - """ - return Trial(id=uid, parameters=self.parameters) @attr.define class TrialFilter: - """Trial filter. - - All filters are by default 'AND' conditions. - - Attributes: - ids: If set, requires the trial's id to be in the set. - min_id: If set, requires the trial's id to be at least this number. - max_id: If set, requires the trial's id to be at most this number. - status: If set, requires the trial's status to be in the set. - """ - ids: Optional[FrozenSet[int]] = attr.field( - default=None, - converter=lambda x: frozenset(x) if x is not None else None, - validator=attr.validators.optional( - attr.validators.deep_iterable( - attr.validators.instance_of(int), - attr.validators.instance_of(frozenset)))) - min_id: Optional[int] = attr.field(default=None) - max_id: Optional[int] = attr.field(default=None) - status: Optional[FrozenSet[TrialStatus]] = attr.field( - default=None, - converter=lambda x: frozenset(x) if x is not None else None, - validator=attr.validators.optional( - attr.validators.deep_iterable( - attr.validators.instance_of(TrialStatus), - attr.validators.instance_of(frozenset)))) - - def __call__(self, trial: Trial) -> bool: - if self.ids is not None: - if trial.id not in self.ids: - return False - if self.min_id is not None: - if trial.id < self.min_id: - return False - if self.max_id is not None: - if trial.id > self.max_id: - return False - if self.status is not None: - if trial.status not in self.status: - return False - return True + """Trial filter. + + All filters are by default 'AND' conditions. + + Attributes: + ids: If set, requires the trial's id to be in the set. + min_id: If set, requires the trial's id to be at least this number. + max_id: If set, requires the trial's id to be at most this number. + status: If set, requires the trial's status to be in the set. + """ + + ids: Optional[FrozenSet[int]] = attr.field( + default=None, + converter=lambda x: frozenset(x) if x is not None else None, + validator=attr.validators.optional( + attr.validators.deep_iterable( + attr.validators.instance_of(int), attr.validators.instance_of(frozenset) + ) + ), + ) + min_id: Optional[int] = attr.field(default=None) + max_id: Optional[int] = attr.field(default=None) + status: Optional[FrozenSet[TrialStatus]] = attr.field( + default=None, + converter=lambda x: frozenset(x) if x is not None else None, + validator=attr.validators.optional( + attr.validators.deep_iterable( + attr.validators.instance_of(TrialStatus), + attr.validators.instance_of(frozenset), + ) + ), + ) + + def __call__(self, trial: Trial) -> bool: + if self.ids is not None: + if trial.id not in self.ids: + return False + if self.min_id is not None: + if trial.id < self.min_id: + return False + if self.max_id is not None: + if trial.id > self.max_id: + return False + if self.status is not None: + if trial.status not in self.status: + return False + return True diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial_test.py b/google/cloud/aiplatform/vizier/pyvizier/trial_test.py index 20f6281995..1eee089706 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial_test.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial_test.py @@ -13,207 +13,199 @@ class MetricTest(absltest.TestCase): + def testMetricCreation(self): + _ = Metric(value=0, std=0.5) - def testMetricCreation(self): - _ = Metric(value=0, std=0.5) + def testMetricCanHaveNaN(self): + _ = Metric(value=np.nan, std=-np.nan) - def testMetricCanHaveNaN(self): - _ = Metric(value=np.nan, std=-np.nan) - - def testMetricCannotHaveNegativeStd(self): - with self.assertRaises(ValueError): - _ = Metric(value=0, std=-0.5) + def testMetricCannotHaveNegativeStd(self): + with self.assertRaises(ValueError): + _ = Metric(value=0, std=-0.5) class MeasurementTest(absltest.TestCase): + def testMetricsInitializedFromFloats(self): + m = Measurement() + m.metrics = dict(a=0.3) + self.assertEqual(m.metrics["a"], Metric(0.3)) + m.metrics["b"] = 0.5 + self.assertEqual(m.metrics, {"a": Metric(0.3), "b": Metric(0.5)}) - def testMetricsInitializedFromFloats(self): - m = Measurement() - m.metrics = dict(a=0.3) - self.assertEqual(m.metrics['a'], Metric(0.3)) - m.metrics['b'] = 0.5 - self.assertEqual(m.metrics, {'a': Metric(0.3), 'b': Metric(0.5)}) - - def testMetrics(self): - m = Measurement() - m.metrics = dict(a=Metric(0.3)) - self.assertEqual(m.metrics['a'], Metric(0.3)) + def testMetrics(self): + m = Measurement() + m.metrics = dict(a=Metric(0.3)) + self.assertEqual(m.metrics["a"], Metric(0.3)) - def testTimeStampsAreNotFrozen(self): - m = Measurement() - m.elapsed_secs = 1.0 - m.steps = 5 + def testTimeStampsAreNotFrozen(self): + m = Measurement() + m.elapsed_secs = 1.0 + m.steps = 5 ParameterValue = trial.ParameterValue class ParameterValueTest(parameterized.TestCase): - - @parameterized.named_parameters(('True', True), ('False', False)) - def testBool(self, bool_value): - value = ParameterValue(bool_value) - self.assertEqual(value.as_float, float(bool_value)) - self.assertEqual(value.as_int, int(bool_value)) - self.assertEqual(value.as_str, str(bool_value).lower()) - - def testIntegralFloat0(self): - value = ParameterValue(0.0) - self.assertEqual(value.as_float, 0.0) - self.assertEqual(value.as_int, 0) - self.assertEqual(value.as_bool, False) - self.assertIsNone(value.as_str) - - def testIntegralFloat1(self): - value = ParameterValue(1.0) - self.assertEqual(value.as_float, 1.0) - self.assertEqual(value.as_int, 1) - self.assertEqual(value.as_bool, True) - self.assertIsNone(value.as_str) - - def testIntegralFloat2(self): - value = ParameterValue(2.0) - self.assertEqual(value.as_float, 2.0) - self.assertEqual(value.as_int, 2) - self.assertIsNone(value.as_bool) - self.assertIsNone(value.as_str) - - def testInteger0(self): - value = ParameterValue(0) - self.assertEqual(value.as_float, 0) - self.assertEqual(value.as_int, 0) - self.assertEqual(value.as_bool, False) - self.assertIsNone(value.as_str) - - def testInteger1(self): - value = ParameterValue(1) - self.assertEqual(value.as_float, 1) - self.assertEqual(value.as_int, 1) - self.assertEqual(value.as_bool, True) - self.assertIsNone(value.as_str) - - def testInteger2(self): - value = ParameterValue(2) - self.assertEqual(value.as_float, 2) - self.assertEqual(value.as_int, 2) - self.assertIsNone(value.as_bool) - self.assertIsNone(value.as_str) - - def testStringTrue(self): - value = ParameterValue('true') - self.assertEqual(value.as_bool, True) - self.assertEqual(value.as_str, 'true') - - def testStringFalse(self): - value = ParameterValue('false') - self.assertEqual(value.as_bool, False) - self.assertEqual(value.as_str, 'false') - - def testCastAsExternalNone(self): - value = ParameterValue(1.0) - # pytype: disable=wrong-arg-types - with self.assertRaisesRegex(ValueError, 'Unknown external type'): - value.cast(None) - # pytype: enable=wrong-arg-types - - def testParameterCanHaveNonFiniteValues(self): - ParameterValue(float('nan')) - ParameterValue(value=float('inf')) - ParameterValue(value=float('inf')) + @parameterized.named_parameters(("True", True), ("False", False)) + def testBool(self, bool_value): + value = ParameterValue(bool_value) + self.assertEqual(value.as_float, float(bool_value)) + self.assertEqual(value.as_int, int(bool_value)) + self.assertEqual(value.as_str, str(bool_value).lower()) + + def testIntegralFloat0(self): + value = ParameterValue(0.0) + self.assertEqual(value.as_float, 0.0) + self.assertEqual(value.as_int, 0) + self.assertEqual(value.as_bool, False) + self.assertIsNone(value.as_str) + + def testIntegralFloat1(self): + value = ParameterValue(1.0) + self.assertEqual(value.as_float, 1.0) + self.assertEqual(value.as_int, 1) + self.assertEqual(value.as_bool, True) + self.assertIsNone(value.as_str) + + def testIntegralFloat2(self): + value = ParameterValue(2.0) + self.assertEqual(value.as_float, 2.0) + self.assertEqual(value.as_int, 2) + self.assertIsNone(value.as_bool) + self.assertIsNone(value.as_str) + + def testInteger0(self): + value = ParameterValue(0) + self.assertEqual(value.as_float, 0) + self.assertEqual(value.as_int, 0) + self.assertEqual(value.as_bool, False) + self.assertIsNone(value.as_str) + + def testInteger1(self): + value = ParameterValue(1) + self.assertEqual(value.as_float, 1) + self.assertEqual(value.as_int, 1) + self.assertEqual(value.as_bool, True) + self.assertIsNone(value.as_str) + + def testInteger2(self): + value = ParameterValue(2) + self.assertEqual(value.as_float, 2) + self.assertEqual(value.as_int, 2) + self.assertIsNone(value.as_bool) + self.assertIsNone(value.as_str) + + def testStringTrue(self): + value = ParameterValue("true") + self.assertEqual(value.as_bool, True) + self.assertEqual(value.as_str, "true") + + def testStringFalse(self): + value = ParameterValue("false") + self.assertEqual(value.as_bool, False) + self.assertEqual(value.as_str, "false") + + def testCastAsExternalNone(self): + value = ParameterValue(1.0) + # pytype: disable=wrong-arg-types + with self.assertRaisesRegex(ValueError, "Unknown external type"): + value.cast(None) + # pytype: enable=wrong-arg-types + + def testParameterCanHaveNonFiniteValues(self): + ParameterValue(float("nan")) + ParameterValue(value=float("inf")) + ParameterValue(value=float("inf")) class TrialTest(absltest.TestCase): - - def testCompleteInplace(self): - test = trial.Trial() - measurement = Measurement(metrics={ - 'pr-auc': Metric(value=0.8), - 'latency': Metric(value=32) - }) - completed = test.complete(measurement, inplace=True) - - # The trial was completed in place. - self.assertEqual(test.final_measurement, measurement) - self.assertLessEqual(test.completion_time, - datetime.datetime.now().astimezone()) - self.assertGreaterEqual(test.completion_time, test.creation_time) - self.assertGreaterEqual(test.duration.total_seconds(), 0) - - # completed is the same reference as test. - self.assertEqual(test, completed) - - def testCompleteNotInplace(self): - """Complete with inplace=False.""" - test = trial.Trial() - measurement = Measurement(metrics={ - 'pr-auc': Metric(value=0.8), - 'latency': Metric(value=32) - }) - - test_copy = copy.deepcopy(test) - - completed = test.complete(measurement, inplace=False) - - # The returned Trial is completed. - self.assertEqual(completed.final_measurement, measurement) - self.assertGreaterEqual(completed.completion_time, completed.creation_time) - self.assertLessEqual(completed.completion_time, - datetime.datetime.now().astimezone()) - self.assertGreaterEqual(completed.duration.total_seconds(), 0) - self.assertEqual(completed.status, trial.TrialStatus.COMPLETED) - self.assertTrue(completed.is_completed) - - # The original Trial is unchanged. - self.assertEqual(test_copy, test) - self.assertIsNone(test.final_measurement) - self.assertIsNone(test.completion_time) - self.assertIsNone(test.duration) - self.assertEqual(test.status, trial.TrialStatus.ACTIVE) - self.assertFalse(test.is_completed) - - def testDefaultsNotShared(self): - """Make sure default parameters are not shared between instances.""" - trial1 = trial.Trial() - trial2 = trial.Trial() - trial1.parameters['x1'] = trial.ParameterValue(5) - self.assertEmpty(trial2.parameters) + def testCompleteInplace(self): + test = trial.Trial() + measurement = Measurement( + metrics={"pr-auc": Metric(value=0.8), "latency": Metric(value=32)} + ) + completed = test.complete(measurement, inplace=True) + + # The trial was completed in place. + self.assertEqual(test.final_measurement, measurement) + self.assertLessEqual(test.completion_time, datetime.datetime.now().astimezone()) + self.assertGreaterEqual(test.completion_time, test.creation_time) + self.assertGreaterEqual(test.duration.total_seconds(), 0) + + # completed is the same reference as test. + self.assertEqual(test, completed) + + def testCompleteNotInplace(self): + """Complete with inplace=False.""" + test = trial.Trial() + measurement = Measurement( + metrics={"pr-auc": Metric(value=0.8), "latency": Metric(value=32)} + ) + + test_copy = copy.deepcopy(test) + + completed = test.complete(measurement, inplace=False) + + # The returned Trial is completed. + self.assertEqual(completed.final_measurement, measurement) + self.assertGreaterEqual(completed.completion_time, completed.creation_time) + self.assertLessEqual( + completed.completion_time, datetime.datetime.now().astimezone() + ) + self.assertGreaterEqual(completed.duration.total_seconds(), 0) + self.assertEqual(completed.status, trial.TrialStatus.COMPLETED) + self.assertTrue(completed.is_completed) + + # The original Trial is unchanged. + self.assertEqual(test_copy, test) + self.assertIsNone(test.final_measurement) + self.assertIsNone(test.completion_time) + self.assertIsNone(test.duration) + self.assertEqual(test.status, trial.TrialStatus.ACTIVE) + self.assertFalse(test.is_completed) + + def testDefaultsNotShared(self): + """Make sure default parameters are not shared between instances.""" + trial1 = trial.Trial() + trial2 = trial.Trial() + trial1.parameters["x1"] = trial.ParameterValue(5) + self.assertEmpty(trial2.parameters) class ParameterDictTest(parameterized.TestCase): - - @parameterized.parameters((True,), (3,), (1.,), ('aa',)) - def testAssignRawValue(self, v): - d = trial.ParameterDict() - d['p1'] = v - self.assertEqual(d.get('p1'), trial.ParameterValue(v)) - self.assertEqual(d.get_value('p1'), v) - self.assertEqual(d.get_value('p2', 'default'), 'default') - self.assertLen(d, 1) - self.assertLen(d.items(), 1) - - @parameterized.parameters((True,), (3,), (1.,), ('aa',)) - def testAssignWrappedValue(self, v): - d = trial.ParameterDict() - v = trial.ParameterValue(v) - d['p1'] = v - self.assertEqual(d.get('p1'), v) - self.assertEqual(d.get_value('p1'), v.value) - self.assertEqual(d.get_value('p2', 'default'), 'default') - self.assertLen(d, 1) - self.assertLen(d.items(), 1) + @parameterized.parameters((True,), (3,), (1.0,), ("aa",)) + def testAssignRawValue(self, v): + d = trial.ParameterDict() + d["p1"] = v + self.assertEqual(d.get("p1"), trial.ParameterValue(v)) + self.assertEqual(d.get_value("p1"), v) + self.assertEqual(d.get_value("p2", "default"), "default") + self.assertLen(d, 1) + self.assertLen(d.items(), 1) + + @parameterized.parameters((True,), (3,), (1.0,), ("aa",)) + def testAssignWrappedValue(self, v): + d = trial.ParameterDict() + v = trial.ParameterValue(v) + d["p1"] = v + self.assertEqual(d.get("p1"), v) + self.assertEqual(d.get_value("p1"), v.value) + self.assertEqual(d.get_value("p2", "default"), "default") + self.assertLen(d, 1) + self.assertLen(d.items(), 1) class SuggestionTestI(absltest.TestCase): + def testToTrial(self): + suggestion = trial.TrialSuggestion({"a": 3, "b": True}) + suggestion.metadata["key"] = "value" - def testToTrial(self): - suggestion = trial.TrialSuggestion({'a': 3, 'b': True}) - suggestion.metadata['key'] = 'value' - - t = suggestion.to_trial(1) - self.assertEqual(t.id, 1) - self.assertEqual(t.parameters, suggestion.parameters) - self.assertEqual(t.metadata, suggestion.metadata) + t = suggestion.to_trial(1) + self.assertEqual(t.id, 1) + self.assertEqual(t.parameters, suggestion.parameters) + self.assertEqual(t.metadata, suggestion.metadata) -if __name__ == '__main__': - absltest.main() +if __name__ == "__main__": + absltest.main() diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py index 121aac70f3..a10baa7e9a 100644 --- a/google/cloud/aiplatform/vizier/study.py +++ b/google/cloud/aiplatform/vizier/study.py @@ -32,14 +32,15 @@ from google.cloud.aiplatform.compat.services import vizier_service_client_v1 from google.cloud.aiplatform.compat.types import ( study as gca_study, - vizier_service as gca_vizier_service + vizier_service as gca_vizier_service, ) -_T = TypeVar('_T') +_T = TypeVar("_T") _LOGGER = base.Logger(__name__) -class Study(base.VertexAiResourceNounWithFutureManager, StudyInterface): + +class Study(base.VertexAiResourceNounWithFutureManager, StudyInterface): """Manage Study resource for Vertex Vizier.""" client_class = utils.VizierClientWithOverride @@ -62,9 +63,9 @@ def __init__( Example Usage: study = aiplatform.Study(study_id = '12345678') - or + or study = aiplatform.Study(study_id = 'projects/123/locations/us-central1/studies/12345678') - + Args: study_id (str): Required. A fully-qualified study resource name or a study ID. @@ -91,106 +92,128 @@ def __init__( @classmethod @base.optional_sync() - def create_or_load(cls, display_name: str, problem: vz.ProblemStatement) -> StudyInterface: - """Creates a Study resource. - - Example Usage: - sc = pyvizier.StudyConfig() - sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH - sc.metric_information.append( - pyvizier.MetricInformation( - name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) - root = sc.search_space.select_root() - root.add_float_param( - 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) - root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - study = aiplatform.Study.create_or_load(display_name='display_name', problem=sc) - - Args: - display_name (str): - A name to describe the Study. - problem (vz.ProblemStatement): - Configurations of the study. It defines the problem to create the study. - """ - api_client = cls._instantiate_client(location=initializer.global_config.location, credentials=initializer.global_config.credentials) - study = gca_study.Study( - display_name = display_name, - study_spec = problem.to_proto() - ) - - try: - study = api_client.create_study(parent = initializer.global_config.common_location_path(initializer.global_config.project, initializer.global_config.location), study = study) - except exceptions.AlreadyExists: - _LOGGER.info("The study is aleady created. Using existing study.") - study = api_client.lookup_study(request = { - "parent" : initializer.global_config.common_location_path(initializer.global_config.project, initializer.global_config.location), - "display_name" : display_name}) - - return Study(study.name) + def create_or_load( + cls, display_name: str, problem: vz.ProblemStatement + ) -> StudyInterface: + """Creates a Study resource. + + Example Usage: + sc = pyvizier.StudyConfig() + sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH + sc.metric_information.append( + pyvizier.MetricInformation( + name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + root = sc.search_space.select_root() + root.add_float_param( + 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) + root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) + study = aiplatform.Study.create_or_load(display_name='display_name', problem=sc) + + Args: + display_name (str): + A name to describe the Study. + problem (vz.ProblemStatement): + Configurations of the study. It defines the problem to create the study. + """ + api_client = cls._instantiate_client( + location=initializer.global_config.location, + credentials=initializer.global_config.credentials, + ) + study = gca_study.Study( + display_name=display_name, study_spec=problem.to_proto() + ) + + try: + study = api_client.create_study( + parent=initializer.global_config.common_location_path( + initializer.global_config.project, + initializer.global_config.location, + ), + study=study, + ) + except exceptions.AlreadyExists: + _LOGGER.info("The study is aleady created. Using existing study.") + study = api_client.lookup_study( + request={ + "parent": initializer.global_config.common_location_path( + initializer.global_config.project, + initializer.global_config.location, + ), + "display_name": display_name, + } + ) + + return Study(study.name) def get_trial(self, uid: int) -> TrialInterface: - """Retrieves the trial under the study by given trial id.""" - study_path_components = self._parse_resource_name(self.resource_name) - _LOGGER.info(study_path_components) - return Trial(Trial._format_resource_name( - project=study_path_components["project"], - location=study_path_components["location"], - study=study_path_components["study"], - trial=uid)) + """Retrieves the trial under the study by given trial id.""" + study_path_components = self._parse_resource_name(self.resource_name) + _LOGGER.info(study_path_components) + return Trial( + Trial._format_resource_name( + project=study_path_components["project"], + location=study_path_components["location"], + study=study_path_components["study"], + trial=uid, + ) + ) def trials( - self, - trial_filter: Optional[vz.TrialFilter] = None + self, trial_filter: Optional[vz.TrialFilter] = None ) -> Collection[TrialInterface]: - """Fetches a collection of trials.""" - list_trials_request = {"parent": self.resource_name} - trials_response = self.api_client.list_trials(request=list_trials_request) - return [Trial(trial.name) for trial in trials_response.trials] + """Fetches a collection of trials.""" + list_trials_request = {"parent": self.resource_name} + trials_response = self.api_client.list_trials(request=list_trials_request) + return [Trial(trial.name) for trial in trials_response.trials] def optimal_trials(self) -> Collection[TrialInterface]: - """Returns optimal trial(s).""" - list_optimal_trials_request = {"parent": self.resource_name} - optimal_trials_response = self.api_client.list_optimal_trials(request=list_optimal_trials_request) - return [Trial(trial.name) for trial in optimal_trials_response.optimal_trials] - + """Returns optimal trial(s).""" + list_optimal_trials_request = {"parent": self.resource_name} + optimal_trials_response = self.api_client.list_optimal_trials( + request=list_optimal_trials_request + ) + return [Trial(trial.name) for trial in optimal_trials_response.optimal_trials] + def materialize_study_config(self) -> vz.StudyConfig: - """#Materializes the study config.""" - study = self.api_client.get_study(name = self.resource_name) - return copy.deepcopy(vz.StudyConfig.from_proto(study.study_spec)) - + """#Materializes the study config.""" + study = self.api_client.get_study(name=self.resource_name) + return copy.deepcopy(vz.StudyConfig.from_proto(study.study_spec)) + @classmethod def from_uid(cls: Type[_T], uid: str) -> _T: - """Fetches an existing study from the Vizier service. - - Args: - uid: Unique identifier of the study. - """ - return Study(study_id = uid) - - def suggest(self, - *, - count: Optional[int] = None, - worker: str = '') -> Collection[TrialInterface]: - """Returns Trials to be evaluated by worker. - - Args: - count: Number of suggestions. - worker: When new Trials are generated, their `assigned_worker` field is - populated with this worker. suggest() first looks for existing Trials - that are assigned to `worker`, before generating new ones. - """ - suggest_trials_lro = self.api_client.suggest_trials(request={ - "parent": self.resource_name, - "suggestion_count": count, - "client_id": worker}) - _LOGGER.log_action_started_against_resource_with_lro( - "Suggest", "study", self.__class__, suggest_trials_lro) - _LOGGER.info(self.client_class.get_gapic_client_class()) - trials = suggest_trials_lro.result() - _LOGGER.log_action_completed_against_resource("study", "suggested", self) - return [Trial(trial.name) for trial in trials.trials] + """Fetches an existing study from the Vizier service. + + Args: + uid: Unique identifier of the study. + """ + return Study(study_id=uid) + def suggest( + self, *, count: Optional[int] = None, worker: str = "" + ) -> Collection[TrialInterface]: + """Returns Trials to be evaluated by worker. + + Args: + count: Number of suggestions. + worker: When new Trials are generated, their `assigned_worker` field is + populated with this worker. suggest() first looks for existing Trials + that are assigned to `worker`, before generating new ones. + """ + suggest_trials_lro = self.api_client.suggest_trials( + request={ + "parent": self.resource_name, + "suggestion_count": count, + "client_id": worker, + } + ) + _LOGGER.log_action_started_against_resource_with_lro( + "Suggest", "study", self.__class__, suggest_trials_lro + ) + _LOGGER.info(self.client_class.get_gapic_client_class()) + trials = suggest_trials_lro.result() + _LOGGER.log_action_completed_against_resource("study", "suggested", self) + return [Trial(trial.name) for trial in trials.trials] def delete(self) -> None: - """Deletes the study.""" - self.api_client.delete_study(name = self.resource_name) + """Deletes the study.""" + self.api_client.delete_study(name=self.resource_name) diff --git a/google/cloud/aiplatform/vizier/trial.py b/google/cloud/aiplatform/vizier/trial.py index 7be711e530..9b7f213314 100644 --- a/google/cloud/aiplatform/vizier/trial.py +++ b/google/cloud/aiplatform/vizier/trial.py @@ -28,11 +28,11 @@ from google.cloud.aiplatform.compat.services import vizier_service_client_v1 -_T = TypeVar('_T') +_T = TypeVar("_T") _LOGGER = base.Logger(__name__) -class Trial(base.VertexAiResourceNounWithFutureManager, TrialInterface): +class Trial(base.VertexAiResourceNounWithFutureManager, TrialInterface): """Manage Trial resource for Vertex Vizier.""" client_class = utils.VizierClientWithOverride @@ -56,9 +56,9 @@ def __init__( Example Usage: trial = aiplatform.Trial(trial_name = 'projects/123/locations/us-central1/studies/12345678/trials/1') - or + or trial = aiplatform.Trial(trial_name = '1', study_id = '12345678') - + Args: trial_name (str): Required. A fully-qualified trial resource name or a trial ID. @@ -89,85 +89,96 @@ def __init__( self._gca_resource = self._get_gca_resource( resource_name=trial_name, parent_resource_name_fields={ - study.Study._resource_noun: study_id, + study.Study._resource_noun: study_id, } if study_id else study_id, ) - - + @property def uid(self) -> int: - """Unique identifier of the trial.""" - trial_path_components = self._parse_resource_name(self.resource_name) - return int(trial_path_components["trial"]) - + """Unique identifier of the trial.""" + trial_path_components = self._parse_resource_name(self.resource_name) + return int(trial_path_components["trial"]) + @property def parameters(self) -> Mapping[str, Any]: - """Parameters of the trial.""" - trial = self.api_client.get_trial(name = self.resource_name) - return vz.TrialConverter.from_proto(trial).parameters - + """Parameters of the trial.""" + trial = self.api_client.get_trial(name=self.resource_name) + return vz.TrialConverter.from_proto(trial).parameters + @property def status(self) -> vz.TrialStatus: - """Status of the Trial.""" - trial = self.api_client.get_trial(name = self.resource_name) - return vz.TrialConverter.from_proto(trial).status - + """Status of the Trial.""" + trial = self.api_client.get_trial(name=self.resource_name) + return vz.TrialConverter.from_proto(trial).status + def delete(self) -> None: - """Deletes the Trial in Vizier service.""" - self.api_client.delete_trial(name=self.resource_name) - + """Deletes the Trial in Vizier service.""" + self.api_client.delete_trial(name=self.resource_name) + def complete( self, measurement: Optional[vz.Measurement] = None, *, - infeasible_reason: Optional[str] = None) -> Optional[vz.Measurement]: - """Completes the trial and #materializes the measurement. - - * If `measurement` is provided, then Vizier writes it as the trial's final - measurement and returns it. - * If `infeasible_reason` is provided, `measurement` is not needed. - * If neither is provided, then Vizier selects an existing (intermediate) - measurement to be the final measurement and returns it. - - Args: - measurement: Final measurement. - infeasible_reason: Infeasible reason for missing final measurement. - """ - complete_trial_request = {'name' : self.resource_name} - if infeasible_reason is not None: - complete_trial_request['infeasible_reason'] = infeasible_reason - complete_trial_request['trial_infeasible'] = True - if measurement is not None: - complete_trial_request['final_measurement'] = vz.MeasurementConverter.to_proto(measurement) - trial = self.api_client.complete_trial(request=complete_trial_request) - return vz.MeasurementConverter.from_proto(trial.final_measurement) if trial.final_measurement else None - + infeasible_reason: Optional[str] = None + ) -> Optional[vz.Measurement]: + """Completes the trial and #materializes the measurement. + + * If `measurement` is provided, then Vizier writes it as the trial's final + measurement and returns it. + * If `infeasible_reason` is provided, `measurement` is not needed. + * If neither is provided, then Vizier selects an existing (intermediate) + measurement to be the final measurement and returns it. + + Args: + measurement: Final measurement. + infeasible_reason: Infeasible reason for missing final measurement. + """ + complete_trial_request = {"name": self.resource_name} + if infeasible_reason is not None: + complete_trial_request["infeasible_reason"] = infeasible_reason + complete_trial_request["trial_infeasible"] = True + if measurement is not None: + complete_trial_request[ + "final_measurement" + ] = vz.MeasurementConverter.to_proto(measurement) + trial = self.api_client.complete_trial(request=complete_trial_request) + return ( + vz.MeasurementConverter.from_proto(trial.final_measurement) + if trial.final_measurement + else None + ) + def should_stop(self) -> bool: - """Returns true if the Trial should stop.""" - check_trial_early_stopping_state_request = {'trial_name' : self.resource_name} - should_stop_lro = self.api_client.check_trial_early_stopping_state(request=check_trial_early_stopping_state_request) - _LOGGER.log_action_started_against_resource_with_lro( - "ShouldStop", "trial", self.__class__, should_stop_lro) - should_stop_lro.result() - _LOGGER.log_action_completed_against_resource("trial", "should_stop", self) - return should_stop_lro.result().should_stop - + """Returns true if the Trial should stop.""" + check_trial_early_stopping_state_request = {"trial_name": self.resource_name} + should_stop_lro = self.api_client.check_trial_early_stopping_state( + request=check_trial_early_stopping_state_request + ) + _LOGGER.log_action_started_against_resource_with_lro( + "ShouldStop", "trial", self.__class__, should_stop_lro + ) + should_stop_lro.result() + _LOGGER.log_action_completed_against_resource("trial", "should_stop", self) + return should_stop_lro.result().should_stop + def add_measurement(self, measurement: vz.Measurement) -> None: - """Adds an intermediate measurement.""" - add_trial_measurement_request = { - 'trial_name' : self.resource_name, - } - add_trial_measurement_request['measurement'] = vz.MeasurementConverter.to_proto(measurement) - self.api_client.add_trial_measurement(request = add_trial_measurement_request) + """Adds an intermediate measurement.""" + add_trial_measurement_request = { + "trial_name": self.resource_name, + } + add_trial_measurement_request["measurement"] = vz.MeasurementConverter.to_proto( + measurement + ) + self.api_client.add_trial_measurement(request=add_trial_measurement_request) def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial: - """#Materializes the Trial. - - Args: - include_all_measurements: If True, returned Trial includes all - intermediate measurements. The final measurement is always provided. - """ - trial = self.api_client.get_trial(name = self.resource_name) - return copy.deepcopy(vz.TrialConverter.from_proto(trial)) + """#Materializes the Trial. + + Args: + include_all_measurements: If True, returned Trial includes all + intermediate measurements. The final measurement is always provided. + """ + trial = self.api_client.get_trial(name=self.resource_name) + return copy.deepcopy(vz.TrialConverter.from_proto(trial)) diff --git a/tests/system/aiplatform/test_vizier.py b/tests/system/aiplatform/test_vizier.py index 0170ee9e4e..2c949ade83 100644 --- a/tests/system/aiplatform/test_vizier.py +++ b/tests/system/aiplatform/test_vizier.py @@ -5,33 +5,42 @@ from tests.system.aiplatform import e2e_base from google.cloud.aiplatform.vizier import pyvizier -_TEST_STUDY_ID=123 +_TEST_STUDY_ID = 123 + class TestVizier(e2e_base.TestEndToEnd): _temp_prefix = "temp_vertex_sdk_e2e_vizier_test" def test_vizier_lifecycle(self, shared_state): aiplatform.init( - project=e2e_base._PROJECT, location=e2e_base._LOCATION, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, ) sc = pyvizier.StudyConfig() sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name="pr-auc", goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() root.add_float_param( - 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) - root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + "learning_rate", 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR + ) + root.add_categorical_param("optimizer", ["adagrad", "adam", "experimental"]) + sc.automated_stopping_config = ( + pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + ) - study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=self._temp_prefix, problem=sc + ) shared_state["resources"] = [study] trials = study.suggest(count=3, worker="halio_test_worker") for trial in trials: if not trial.should_stop(): measurement = pyvizier.Measurement() - measurement.metrics['pr-auc'] = 0.4 + measurement.metrics["pr-auc"] = 0.4 trial.add_measurement(measurement=measurement) trial.complete(measurement=measurement) optimal_trials = study.optimal_trials() @@ -39,49 +48,62 @@ def test_vizier_lifecycle(self, shared_state): for trial in study.trials(): assert trial.status == pyvizier.TrialStatus.COMPLETED assert optimal_trials[0].status == pyvizier.TrialStatus.COMPLETED - + def test_vizier_study_deletion(self, shared_state): aiplatform.init( - project=e2e_base._PROJECT, location=e2e_base._LOCATION, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, ) sc = pyvizier.StudyConfig() sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name="pr-auc", goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() root.add_float_param( - 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) - root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + "learning_rate", 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR + ) + root.add_categorical_param("optimizer", ["adagrad", "adam", "experimental"]) + sc.automated_stopping_config = ( + pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + ) - study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=self._temp_prefix, problem=sc + ) study.delete() - with pytest.raises(exceptions.NotFound): - study = aiplatform.Study(study_id = study.name) - - + study = aiplatform.Study(study_id=study.name) + def test_vizier_trial_deletion(self, shared_state): aiplatform.init( - project=e2e_base._PROJECT, location=e2e_base._LOCATION, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, ) sc = pyvizier.StudyConfig() sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name='pr-auc', goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name="pr-auc", goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() root.add_float_param( - 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) - root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - sc.automated_stopping_config = pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + "learning_rate", 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR + ) + root.add_categorical_param("optimizer", ["adagrad", "adam", "experimental"]) + sc.automated_stopping_config = ( + pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) + ) - study = aiplatform.Study.create_or_load(display_name=self._temp_prefix, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=self._temp_prefix, problem=sc + ) trials = study.suggest(count=1, worker="halio_test_worker") trials[0].delete() - with pytest.raises(exceptions.NotFound): - study = aiplatform.Trial(study_id = study.name, trial_name = trials[0].name) + study = aiplatform.Trial(study_id=study.name, trial_name=trials[0].name) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index a1ca701a39..b2c627262c 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -39,9 +39,9 @@ from google.cloud.aiplatform.compat.services import vizier_service_client from google.cloud.aiplatform.compat.types import ( study as gca_study, - vizier_service as gca_vizier_service + vizier_service as gca_vizier_service, ) -from google.protobuf import duration_pb2 +from google.protobuf import duration_pb2 # project @@ -68,42 +68,44 @@ _TEST_PARAMETER_VALUE_2 = ["adagrad", "adam", "experimental"] _TEST_STUDY = gca_study.Study( - display_name = _TEST_DISPLAY_NAME, - study_spec = gca_study.StudySpec( - algorithm = gca_study.StudySpec.Algorithm.RANDOM_SEARCH, - metrics = [gca_study.StudySpec.MetricSpec( - metric_id = _TEST_METRIC_ID, - goal = gca_study.StudySpec.MetricSpec.GoalType.MAXIMIZE - )], - parameters = [ - gca_study.StudySpec.ParameterSpec( - parameter_id = _TEST_PARAMETER_ID_1, - scale_type = gca_study.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE, - double_value_spec = gca_study.StudySpec.ParameterSpec.DoubleValueSpec( - min_value = _TEST_PARAMETER_ID_MIN_VALUE_1, - max_value = _TEST_PARAMETER_ID_MAX_VALUE_1 - ) - ), - gca_study.StudySpec.ParameterSpec( - parameter_id = _TEST_PARAMETER_ID_2, - categorical_value_spec = gca_study.StudySpec.ParameterSpec.CategoricalValueSpec( - values = _TEST_PARAMETER_VALUE_2 - ) - ) - ] - ) + display_name=_TEST_DISPLAY_NAME, + study_spec=gca_study.StudySpec( + algorithm=gca_study.StudySpec.Algorithm.RANDOM_SEARCH, + metrics=[ + gca_study.StudySpec.MetricSpec( + metric_id=_TEST_METRIC_ID, + goal=gca_study.StudySpec.MetricSpec.GoalType.MAXIMIZE, + ) + ], + parameters=[ + gca_study.StudySpec.ParameterSpec( + parameter_id=_TEST_PARAMETER_ID_1, + scale_type=gca_study.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE, + double_value_spec=gca_study.StudySpec.ParameterSpec.DoubleValueSpec( + min_value=_TEST_PARAMETER_ID_MIN_VALUE_1, + max_value=_TEST_PARAMETER_ID_MAX_VALUE_1, + ), + ), + gca_study.StudySpec.ParameterSpec( + parameter_id=_TEST_PARAMETER_ID_2, + categorical_value_spec=gca_study.StudySpec.ParameterSpec.CategoricalValueSpec( + values=_TEST_PARAMETER_VALUE_2 + ), + ), + ], + ), ) + @pytest.fixture def get_study_mock(): with patch.object( vizier_service_client.VizierServiceClient, "get_study" ) as get_study_mock: - get_study_mock.return_value = gca_study.Study( - name=_TEST_STUDY_NAME - ) + get_study_mock.return_value = gca_study.Study(name=_TEST_STUDY_NAME) yield get_study_mock + @pytest.fixture def get_trial_mock(): with patch.object( @@ -112,68 +114,77 @@ def get_trial_mock(): get_trial_mock.return_value = gca_study.Trial( name=_TEST_TRIAL_NAME, state=gca_study.Trial.State.ACTIVE, - parameters=[gca_study.Trial.Parameter( - parameter_id=_TEST_PARAMETER_ID_1, - value=_TEST_PARAMETER_ID_MIN_VALUE_1 - )] + parameters=[ + gca_study.Trial.Parameter( + parameter_id=_TEST_PARAMETER_ID_1, + value=_TEST_PARAMETER_ID_MIN_VALUE_1, + ) + ], ) yield get_trial_mock + @pytest.fixture def create_study_mock(): with patch.object( vizier_service_client.VizierServiceClient, "create_study" ) as create_study_mock: - create_study_mock.return_value = ( - gca_study.Study( - name=_TEST_STUDY_NAME, - ) + create_study_mock.return_value = gca_study.Study( + name=_TEST_STUDY_NAME, ) yield create_study_mock + @pytest.fixture def lookup_study_mock(): with patch.object( vizier_service_client.VizierServiceClient, "lookup_study" ) as lookup_study_mock: - lookup_study_mock.return_value = ( - gca_study.Study( - name=_TEST_STUDY_NAME, - ) + lookup_study_mock.return_value = gca_study.Study( + name=_TEST_STUDY_NAME, ) yield lookup_study_mock + @pytest.fixture def suggest_trials_mock(): with patch.object( vizier_service_client.VizierServiceClient, "suggest_trials" ) as suggest_trials_mock: suggest_trials_lro_mock = mock.Mock(operation.Operation) - suggest_trials_lro_mock.result.return_value = gca_vizier_service.SuggestTrialsResponse( - trials=[gca_study.Trial(name=_TEST_TRIAL_NAME)]) + suggest_trials_lro_mock.result.return_value = ( + gca_vizier_service.SuggestTrialsResponse( + trials=[gca_study.Trial(name=_TEST_TRIAL_NAME)] + ) + ) suggest_trials_mock.return_value = suggest_trials_lro_mock yield suggest_trials_mock + @pytest.fixture def list_optimal_trials_mock(): with patch.object( vizier_service_client.VizierServiceClient, "list_optimal_trials" ) as list_optimal_trials_mock: - list_optimal_trials_mock.return_value = gca_vizier_service.ListOptimalTrialsResponse( - optimal_trials = [gca_study.Trial(name=_TEST_TRIAL_NAME)] + list_optimal_trials_mock.return_value = ( + gca_vizier_service.ListOptimalTrialsResponse( + optimal_trials=[gca_study.Trial(name=_TEST_TRIAL_NAME)] + ) ) yield list_optimal_trials_mock + @pytest.fixture def list_trials_mock(): with patch.object( vizier_service_client.VizierServiceClient, "list_trials" ) as list_trials_mock: list_trials_mock.return_value = gca_vizier_service.ListTrialsResponse( - trials = [gca_study.Trial(name=_TEST_TRIAL_NAME)] + trials=[gca_study.Trial(name=_TEST_TRIAL_NAME)] ) yield list_trials_mock + @pytest.fixture def delete_study_mock(): with patch.object( @@ -181,6 +192,7 @@ def delete_study_mock(): ) as delete_study_mock: yield delete_study_mock + @pytest.fixture def delete_trial_mock(): with patch.object( @@ -188,49 +200,60 @@ def delete_trial_mock(): ) as delete_trial_mock: yield delete_trial_mock + @pytest.fixture def complete_trial_mock(): with patch.object( vizier_service_client.VizierServiceClient, "complete_trial" ) as complete_trial_mock: complete_trial_mock.return_value = gca_study.Trial( - name=_TEST_TRIAL_NAME, - final_measurement = gca_study.Measurement(step_count = 3, - metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 5)])) + name=_TEST_TRIAL_NAME, + final_measurement=gca_study.Measurement( + step_count=3, + metrics=[gca_study.Measurement.Metric(metric_id="y", value=5)], + ), + ) yield complete_trial_mock + @pytest.fixture def complete_trial_empty_measurement_mock(): with patch.object( vizier_service_client.VizierServiceClient, "complete_trial" ) as complete_trial_empty_measurement_mock: - complete_trial_empty_measurement_mock.return_value = gca_study.Trial(name=_TEST_TRIAL_NAME) + complete_trial_empty_measurement_mock.return_value = gca_study.Trial( + name=_TEST_TRIAL_NAME + ) yield complete_trial_empty_measurement_mock + @pytest.fixture def should_stop_mock(): with patch.object( vizier_service_client.VizierServiceClient, "check_trial_early_stopping_state" ) as should_stop_mock: should_stop_lro_mock = mock.Mock(operation.Operation) - should_stop_lro_mock.result.return_value = gca_vizier_service.CheckTrialEarlyStoppingStateResponse( - should_stop=True) + should_stop_lro_mock.result.return_value = ( + gca_vizier_service.CheckTrialEarlyStoppingStateResponse(should_stop=True) + ) should_stop_mock.return_value = should_stop_lro_mock yield should_stop_mock + @pytest.fixture def create_study_mock_already_exists(): with patch.object( vizier_service_client.VizierServiceClient, "create_study" ) as create_study_mock: create_study_mock.side_effect = [ - exceptions. AlreadyExists("Study already exists."), - gca_study.Study( - name=_TEST_STUDY_NAME, - ) + exceptions.AlreadyExists("Study already exists."), + gca_study.Study( + name=_TEST_STUDY_NAME, + ), ] yield create_study_mock + @pytest.fixture def add_measurement_mock(): with patch.object( @@ -238,6 +261,7 @@ def add_measurement_mock(): ) as add_measurement_mock: yield add_measurement_mock + class TestStudy: def setup_method(self): reload(initializer) @@ -253,33 +277,55 @@ def test_create_study(self, create_study_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) - create_study_mock.assert_called_once_with(parent=_TEST_PARENT, study=_TEST_STUDY) + create_study_mock.assert_called_once_with( + parent=_TEST_PARENT, study=_TEST_STUDY + ) assert type(study) == aiplatform.Study @pytest.mark.usefixtures("get_study_mock") - def test_create_study_already_exists(self, create_study_mock_already_exists, lookup_study_mock): + def test_create_study_already_exists( + self, create_study_mock_already_exists, lookup_study_mock + ): aiplatform.init(project=_TEST_PROJECT) sc = pyvizier.StudyConfig() sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) - lookup_study_mock.assert_called_once_with(request = { - "parent": _TEST_PARENT, - "display_name": _TEST_DISPLAY_NAME}) + lookup_study_mock.assert_called_once_with( + request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME} + ) assert type(study) == aiplatform.Study @pytest.mark.usefixtures("get_study_mock") @@ -289,15 +335,26 @@ def test_materialize_study_config(self, create_study_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) study_config = study.materialize_study_config() - create_study_mock.assert_called_once_with(parent=_TEST_PARENT, study=_TEST_STUDY) + create_study_mock.assert_called_once_with( + parent=_TEST_PARENT, study=_TEST_STUDY + ) assert type(study_config) == pyvizier.StudyConfig @pytest.mark.usefixtures("get_study_mock", "get_trial_mock") @@ -307,25 +364,37 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) - trials = study.suggest(count = 5, worker = 'test_worker') + trials = study.suggest(count=5, worker="test_worker") - suggest_trials_mock.assert_called_once_with(request = { - "parent":_TEST_STUDY_NAME, - "suggestion_count": 5, - "client_id": 'test_worker'}) + suggest_trials_mock.assert_called_once_with( + request={ + "parent": _TEST_STUDY_NAME, + "suggestion_count": 5, + "client_id": "test_worker", + } + ) assert type(trials[0]) == aiplatform.Trial @pytest.mark.usefixtures("get_study_mock") def test_from_uid(self): aiplatform.init(project=_TEST_PROJECT) - study = aiplatform.Study.from_uid(uid = _TEST_STUDY_ID) + study = aiplatform.Study.from_uid(uid=_TEST_STUDY_ID) assert type(study) == aiplatform.Study assert study.name == _TEST_STUDY_ID @@ -337,11 +406,20 @@ def test_delete(self, create_study_mock, delete_study_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) study.delete() @@ -354,15 +432,26 @@ def test_optimal_trials(self, list_optimal_trials_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) trials = study.optimal_trials() - list_optimal_trials_mock.assert_called_once_with(request = {"parent":_TEST_STUDY_NAME}) + list_optimal_trials_mock.assert_called_once_with( + request={"parent": _TEST_STUDY_NAME} + ) assert type(trials[0]) == aiplatform.Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") @@ -372,15 +461,24 @@ def test_list_trials(self, list_trials_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) trials = study.trials() - list_trials_mock.assert_called_once_with(request = {"parent":_TEST_STUDY_NAME}) + list_trials_mock.assert_called_once_with(request={"parent": _TEST_STUDY_NAME}) assert type(trials[0]) == aiplatform.Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock") @@ -390,15 +488,24 @@ def test_get_trial(self, get_trial_mock): sc.algorithm = pyvizier.Algorithm.RANDOM_SEARCH sc.metric_information.append( pyvizier.MetricInformation( - name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE)) + name=_TEST_METRIC_ID, goal=pyvizier.ObjectiveMetricGoal.MAXIMIZE + ) + ) root = sc.search_space.select_root() - root.add_float_param(_TEST_PARAMETER_ID_1, _TEST_PARAMETER_ID_MIN_VALUE_1, _TEST_PARAMETER_ID_MAX_VALUE_1, scale_type=pyvizier.ScaleType.LINEAR) + root.add_float_param( + _TEST_PARAMETER_ID_1, + _TEST_PARAMETER_ID_MIN_VALUE_1, + _TEST_PARAMETER_ID_MAX_VALUE_1, + scale_type=pyvizier.ScaleType.LINEAR, + ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) + study = aiplatform.Study.create_or_load( + display_name=_TEST_DISPLAY_NAME, problem=sc + ) trial = study.get_trial(1) - get_trial_mock.assert_called_once_with(name = _TEST_TRIAL_NAME, retry = ANY) + get_trial_mock.assert_called_once_with(name=_TEST_TRIAL_NAME, retry=ANY) assert type(trial) == aiplatform.Trial @@ -413,84 +520,99 @@ def teardown_method(self): @pytest.mark.usefixtures("get_trial_mock") def test_delete(self, delete_trial_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) trial.delete() - delete_trial_mock.assert_called_once_with(name = _TEST_TRIAL_NAME) + delete_trial_mock.assert_called_once_with(name=_TEST_TRIAL_NAME) assert type(trial) == aiplatform.Trial @pytest.mark.usefixtures("get_trial_mock") def test_complete(self, complete_trial_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() - measurement.metrics['y'] = 4 - - measurement = trial.complete(measurement = measurement, infeasible_reason = 'infeasible') - - complete_trial_mock.assert_called_once_with(request = { - "name": _TEST_TRIAL_NAME, - "infeasible_reason": "infeasible", - "trial_infeasible": True, - "final_measurement": gca_study.Measurement( - elapsed_duration = duration_pb2.Duration(), - metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)]) - }) + measurement.metrics["y"] = 4 + + measurement = trial.complete( + measurement=measurement, infeasible_reason="infeasible" + ) + + complete_trial_mock.assert_called_once_with( + request={ + "name": _TEST_TRIAL_NAME, + "infeasible_reason": "infeasible", + "trial_infeasible": True, + "final_measurement": gca_study.Measurement( + elapsed_duration=duration_pb2.Duration(), + metrics=[gca_study.Measurement.Metric(metric_id="y", value=4)], + ), + } + ) assert type(measurement) == pyvizier.Measurement @pytest.mark.usefixtures("get_trial_mock") def test_complete_empty_measurement(self, complete_trial_empty_measurement_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() - measurement.metrics['y'] = 4 - - measurement = trial.complete(measurement = measurement, infeasible_reason = 'infeasible') - - complete_trial_empty_measurement_mock.assert_called_once_with(request = { - "name": _TEST_TRIAL_NAME, - "infeasible_reason": "infeasible", - "trial_infeasible": True, - "final_measurement": gca_study.Measurement( - elapsed_duration = duration_pb2.Duration(), - metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)]) - }) + measurement.metrics["y"] = 4 + + measurement = trial.complete( + measurement=measurement, infeasible_reason="infeasible" + ) + + complete_trial_empty_measurement_mock.assert_called_once_with( + request={ + "name": _TEST_TRIAL_NAME, + "infeasible_reason": "infeasible", + "trial_infeasible": True, + "final_measurement": gca_study.Measurement( + elapsed_duration=duration_pb2.Duration(), + metrics=[gca_study.Measurement.Metric(metric_id="y", value=4)], + ), + } + ) assert measurement == None @pytest.mark.usefixtures("get_trial_mock") def test_should_stop(self, should_stop_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) should_stop = trial.should_stop() - should_stop_mock.assert_called_once_with(request = { - "trial_name": _TEST_TRIAL_NAME}) + should_stop_mock.assert_called_once_with( + request={"trial_name": _TEST_TRIAL_NAME} + ) assert should_stop == True @pytest.mark.usefixtures("get_trial_mock") def test_add_measurement(self, add_measurement_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() - measurement.metrics['y'] = 4 - - add_measurement = trial.add_measurement(measurement = measurement) - - add_measurement_mock.assert_called_once_with(request = { - "trial_name": _TEST_TRIAL_NAME, - "measurement": gca_study.Measurement( - elapsed_duration = duration_pb2.Duration(), - metrics = [gca_study.Measurement.Metric(metric_id = 'y', value = 4)])}) + measurement.metrics["y"] = 4 + + add_measurement = trial.add_measurement(measurement=measurement) + + add_measurement_mock.assert_called_once_with( + request={ + "trial_name": _TEST_TRIAL_NAME, + "measurement": gca_study.Measurement( + elapsed_duration=duration_pb2.Duration(), + metrics=[gca_study.Measurement.Metric(metric_id="y", value=4)], + ), + } + ) assert add_measurement == None @pytest.mark.usefixtures("get_trial_mock") def test_properties(self): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() - measurement.metrics['y'] = 4 + measurement.metrics["y"] = 4 uid = trial.uid status = trial.status @@ -498,16 +620,21 @@ def test_properties(self): assert uid == 1 assert status == pyvizier.TrialStatus.ACTIVE - assert parameters.get_value(_TEST_PARAMETER_ID_1) == _TEST_PARAMETER_ID_MIN_VALUE_1 + assert ( + parameters.get_value(_TEST_PARAMETER_ID_1) == _TEST_PARAMETER_ID_MIN_VALUE_1 + ) @pytest.mark.usefixtures("get_trial_mock") def test_materialize(self): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name = _TEST_TRIAL_NAME) + trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() - measurement.metrics['y'] = 4 + measurement.metrics["y"] = 4 materialize_trial = trial.materialize() assert materialize_trial.id == 1 - assert materialize_trial.parameters .get_value(_TEST_PARAMETER_ID_1) == _TEST_PARAMETER_ID_MIN_VALUE_1 + assert ( + materialize_trial.parameters.get_value(_TEST_PARAMETER_ID_1) + == _TEST_PARAMETER_ID_MIN_VALUE_1 + ) From ede8356294bce554f6b2cb6f98b5e25b7e62c969 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 17 Jun 2022 14:57:30 -0700 Subject: [PATCH 12/36] Fixed the lint errors for the Vizier. --- google/cloud/aiplatform/__init__.py | 2 ++ .../aiplatform/vizier/pyvizier/__init__.py | 32 +++++++++++++++++++ .../vizier/pyvizier/base_study_config.py | 9 +++--- .../vizier/pyvizier/metadata_util.py | 3 +- .../vizier/pyvizier/proto_converters.py | 1 - .../vizier/pyvizier/study_config.py | 1 - .../cloud/aiplatform/vizier/pyvizier/trial.py | 2 -- google/cloud/aiplatform/vizier/study.py | 10 ++---- google/cloud/aiplatform/vizier/trial.py | 6 +--- tests/unit/aiplatform/test_vizier.py | 15 +++------ 10 files changed, 47 insertions(+), 34 deletions(-) diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 5c54a70598..fae298c9b8 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -139,6 +139,7 @@ "ModelEvaluation", "PipelineJob", "SequenceToSequencePlusForecastingTrainingJob", + "Study", "TabularDataset", "Tensorboard", "TensorboardExperiment", @@ -146,5 +147,6 @@ "TensorboardTimeSeries", "TextDataset", "TimeSeriesDataset", + "Trial", "VideoDataset", ) diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index 2d1c810f72..3082ffeb8e 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -39,3 +39,35 @@ from google.cloud.aiplatform.vizier.pyvizier.trial import TrialFilter from google.cloud.aiplatform.vizier.pyvizier.trial import TrialStatus from google.cloud.aiplatform.vizier.pyvizier.trial import TrialSuggestion + +__all__ = ( + "MetricInformation", + "MetricsConfig", + "MetricType", + "ObjectiveMetricGoal", + "ProblemStatement", + "SearchSpace", + "SearchSpaceSelector", + "Metadata", + "MetadataValue", + "Namespace", + "ParameterConfigConverter", + "MeasurementConverter", + "TrialConverter", + "StudyConfig", + "Algorithm", + "AutomatedStoppingConfig", + "ExternalType", + "ParameterConfig", + "ParameterType", + "ScaleType", + "CompletedTrial", + "Measurement", + "Metric", + "ParameterDict", + "ParameterValue", + "Trial", + "TrialFilter", + "TrialStatus", + "TrialSuggestion", +) diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py index c593b44418..2b92dab7cf 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -642,6 +642,9 @@ def add_categorical_param( new_params.append(new_pc) return self._add_parameters(new_params)[0] + def bool_to_string(x): + return "True" if x else "False" + def add_bool_param( self, name: str, @@ -685,16 +688,14 @@ def add_bool_param( "feasible_values must be one of %s; got: %s." % (allowed_values, feasible_values) ) - # Boolean parameters are represented as categorical parameters internally. - bool_to_string = lambda x: "True" if x else "False" if feasible_values is None: categories = ("True", "False") else: - categories = [bool_to_string(x) for x in feasible_values] + categories = [self.bool_to_string(x) for x in feasible_values] feasible_values = sorted(categories, reverse=True) if default_value is not None: - default_value = bool_to_string(default_value) + default_value = self.bool_to_string(default_value) param_names = self._get_parameter_names_to_create(name=name, index=index) diff --git a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py index 1a8069da59..733d6d5ddb 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py +++ b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py @@ -1,8 +1,7 @@ """Utility functions for handling vizier metadata.""" -from typing import Tuple, Union, Optional, TypeVar, Type +from typing import Union, Optional, TypeVar, Type -# from vizier.service import key_value_pb2 from google.cloud.aiplatform.compat.types import study as study_pb2 from google.protobuf import any_pb2 from google.protobuf.message import Message diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py index c2c3cd30c8..6385f05011 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -2,7 +2,6 @@ import datetime import logging from typing import List, Optional, Sequence, Tuple, Union -from absl import logging from google.protobuf import duration_pb2 from google.cloud.aiplatform.compat.types import study as study_pb2 diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index ee5378e81b..093ac26cc4 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -21,7 +21,6 @@ from google.cloud.aiplatform.vizier.pyvizier import automated_stopping from google.cloud.aiplatform.vizier.pyvizier import proto_converters from google.cloud.aiplatform.vizier.pyvizier import base_study_config -from google.cloud.aiplatform.vizier.pyvizier import common from google.cloud.aiplatform.vizier.pyvizier import parameter_config from google.cloud.aiplatform.vizier.pyvizier import trial from google.cloud.aiplatform.compat.types import study as study_pb2 diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py index 2b6f3c5c57..321d6652ea 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -18,8 +18,6 @@ import attr import numpy as np -from google.cloud.aiplatform.vizier.pyvizier import common - ParameterValueTypes = Union[str, int, float, bool] OrderedDict = collections.OrderedDict diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py index a10baa7e9a..c7feb2887f 100644 --- a/google/cloud/aiplatform/vizier/study.py +++ b/google/cloud/aiplatform/vizier/study.py @@ -15,8 +15,7 @@ # limitations under the License. import copy -from typing import Dict, List, Optional, Sequence, Tuple -from typing import Optional, Collection, Type, TypeVar, Mapping, Any +from typing import Optional, Collection, Type, TypeVar from google.cloud.aiplatform.vizier.client_abc import StudyInterface from google.cloud.aiplatform.vizier.client_abc import TrialInterface @@ -25,15 +24,10 @@ from google.cloud.aiplatform import base from google.cloud.aiplatform import utils from google.cloud.aiplatform import initializer -from google.cloud.aiplatform.vizier import study from google.cloud.aiplatform.vizier.trial import Trial from google.cloud.aiplatform.vizier import pyvizier as vz -from google.cloud.aiplatform.compat.services import vizier_service_client_v1 -from google.cloud.aiplatform.compat.types import ( - study as gca_study, - vizier_service as gca_vizier_service, -) +from google.cloud.aiplatform.compat.types import study as gca_study _T = TypeVar("_T") diff --git a/google/cloud/aiplatform/vizier/trial.py b/google/cloud/aiplatform/vizier/trial.py index 9b7f213314..8fa2dd9328 100644 --- a/google/cloud/aiplatform/vizier/trial.py +++ b/google/cloud/aiplatform/vizier/trial.py @@ -15,9 +15,7 @@ # limitations under the License. import copy -from typing import Dict, List, Optional, Sequence, Tuple -from typing import Optional, Collection, Type, TypeVar, Mapping, Any -from google.cloud.aiplatform.vizier.client_abc import StudyInterface +from typing import Optional, TypeVar, Mapping, Any from google.cloud.aiplatform.vizier.client_abc import TrialInterface from google.auth import credentials as auth_credentials @@ -26,8 +24,6 @@ from google.cloud.aiplatform.vizier import study from google.cloud.aiplatform.vizier import pyvizier as vz -from google.cloud.aiplatform.compat.services import vizier_service_client_v1 - _T = TypeVar("_T") _LOGGER = base.Logger(__name__) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index b2c627262c..ce6c3e08c6 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -14,26 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import pytest -import datetime -import uuid from unittest import mock from importlib import reload -from unittest.mock import MagicMock, patch +from unittest.mock import patch from unittest.mock import ANY from google.api_core import exceptions from google.api_core import operation -from google.protobuf import field_mask_pb2, timestamp_pb2 from google.cloud import aiplatform -from google.cloud.aiplatform import base from google.cloud.aiplatform import initializer -from google.cloud.aiplatform import utils -from google.cloud.aiplatform.utils import resource_manager_utils from google.cloud.aiplatform.vizier import pyvizier from google.cloud.aiplatform.compat.services import vizier_service_client @@ -573,7 +566,7 @@ def test_complete_empty_measurement(self, complete_trial_empty_measurement_mock) ), } ) - assert measurement == None + assert measurement is None @pytest.mark.usefixtures("get_trial_mock") def test_should_stop(self, should_stop_mock): @@ -585,7 +578,7 @@ def test_should_stop(self, should_stop_mock): should_stop_mock.assert_called_once_with( request={"trial_name": _TEST_TRIAL_NAME} ) - assert should_stop == True + assert should_stop is True @pytest.mark.usefixtures("get_trial_mock") def test_add_measurement(self, add_measurement_mock): @@ -605,7 +598,7 @@ def test_add_measurement(self, add_measurement_mock): ), } ) - assert add_measurement == None + assert add_measurement is None @pytest.mark.usefixtures("get_trial_mock") def test_properties(self): From f414c0f68091401188f50e8de6224470973c112d Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 17 Jun 2022 16:17:12 -0700 Subject: [PATCH 13/36] Made the unit test import the google credentials. --- tests/unit/aiplatform/test_vizier.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index ce6c3e08c6..1f1e12f1b4 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -255,6 +255,7 @@ def add_measurement_mock(): yield add_measurement_mock +@pytest.mark.usefixtures("google_auth_mock") class TestStudy: def setup_method(self): reload(initializer) @@ -502,6 +503,7 @@ def test_get_trial(self, get_trial_mock): assert type(trial) == aiplatform.Trial +@pytest.mark.usefixtures("google_auth_mock") class TestTrial: def setup_method(self): reload(initializer) From 6dc42e763aa3425fe79bbca8120be771608bc436 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 21 Jun 2022 16:22:03 -0700 Subject: [PATCH 14/36] Disable the coverage check for the pyvizier. It will be imported from the open source vizier. --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 01d28d4b2c..e0d73b97a8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,6 +5,7 @@ branch = True show_missing = True omit = google/cloud/aiplatform/v1/schema/trainingjob/definition/__init__.py + google/cloud/aiplatform/vizier/pyvizier/* exclude_lines = # Re-enable the standard pragma pragma: NO COVER From cf8b21361f6fdbcf6a55bd51815bf5084b918c20 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 21 Jun 2022 16:22:53 -0700 Subject: [PATCH 15/36] Remove the converage dependency to avoid the conflicts. --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index cf84e2da7e..faad7e642d 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,6 @@ "attrs==21.4.0", "absl-py>=0.7", "numpy>=1.19.0", - "coverage>=4.5,<5.0", "protobuf>=3.6,<4.0", "pytype==2022.1.5", "keras-tuner>=1.0,<2.0", From 60a8653245e530476143dcb2992c5debf0556960 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 28 Jun 2022 16:16:01 -0700 Subject: [PATCH 16/36] Fixing the py-3.9 issue in the sample/module-builder --- .../vizier/pyvizier/base_study_config.py | 15 ++++++++++++++- .../vizier/pyvizier/parameter_config.py | 4 ++-- google/cloud/aiplatform/vizier/pyvizier/trial.py | 11 ++--------- samples/model-builder/requirements.txt | 3 ++- samples/snippets/requirements.txt | 2 +- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py index 2b92dab7cf..d94a7d6de0 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -19,7 +19,6 @@ ) import attr -import numpy as np from google.cloud.aiplatform.vizier.pyvizier import common from google.cloud.aiplatform.vizier.pyvizier import parameter_config from google.cloud.aiplatform.vizier.pyvizier import trial @@ -126,6 +125,11 @@ class MetricInformation: ) # Minimum value of this metric can be optionally specified. + try: + import numpy as np + except: + raise ImportError('... install with google-cloud-aiplatform[vizier]') + min_value: float = attr.field( init=True, default=None, @@ -157,6 +161,11 @@ def min_value_or(self, default_value_fn: Callable[[], float]) -> float: Args: default_value_fn: Default value if min_value is not finite. """ + try: + import numpy as np + except: + raise ImportError('... install with numpy') + if np.isfinite(self.min_value): return self.min_value else: @@ -173,6 +182,10 @@ def max_value_or(self, default_value_fn: Callable[[], float]) -> float: Args: default_value_fn: Default value if max_value is not configured. """ + try: + import numpy as np + except: + raise ImportError('... install with numpy') if np.isfinite(self.max_value): return self.max_value else: diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py index 13390d49dd..873126537e 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py @@ -6,7 +6,6 @@ import math from typing import Generator, List, Optional, Sequence, Tuple, Union -from absl import logging import attr from google.cloud.aiplatform.vizier.pyvizier import trial @@ -401,7 +400,6 @@ def add_children( " given: {}".format(child_pair) ) - logging.debug("add_children: new_children=%s", new_children) child_parameter_configs = parent.child_parameter_configs for unsorted_parent_values, child in new_children: parent_values = sorted(unsorted_parent_values) @@ -460,6 +458,7 @@ def continuify(self) -> "ParameterConfig": scale_type = self.scale_type if scale_type == ScaleType.UNIFORM_DISCRETE: + from absl import logging logging.log_every_n( logging.WARNING, "Converting a UNIFORM_DISCRETE scaled discrete parameter " @@ -511,6 +510,7 @@ def merge( ) ) if one.scale_type != other.scale_type: + from absl import logging logging.warning("Scale type conflicts while merging %s and %s", one, other) if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py index 321d6652ea..f939a9d209 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -14,9 +14,7 @@ import enum from typing import Any, Dict, List, MutableMapping, Optional, Union, FrozenSet -from absl import logging import attr -import numpy as np ParameterValueTypes = Union[str, int, float, bool] OrderedDict = collections.OrderedDict @@ -68,13 +66,6 @@ def _std_not_negative(self, _, stddev): kw_only=True, ) - -# Use when you want to preserve the shapes or reduce if-else statements. -# e.g. `metrics.get('metric_name', NaNMetric).value` to get NaN or the actual -# value. -NaNMetric = Metric(value=np.nan) - - @attr.s(auto_attribs=True, frozen=True, init=True, slots=True, repr=False) class ParameterValue: """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. @@ -196,6 +187,7 @@ class Measurement: """Collection of metrics with a timestamp.""" def _value_is_finite(self, _, value): + import numpy as np if not (np.isfinite(value) and value >= 0): raise ValueError("Must be finite and non-negative.") @@ -414,6 +406,7 @@ def status(self) -> TrialStatus: @property def is_completed(self) -> bool: """Returns True if this Trial is completed.""" + from absl import logging if self.status == TrialStatus.COMPLETED: if self.completion_time is None: logging.warning( diff --git a/samples/model-builder/requirements.txt b/samples/model-builder/requirements.txt index 983ca6b4c6..d29e95742b 100644 --- a/samples/model-builder/requirements.txt +++ b/samples/model-builder/requirements.txt @@ -1 +1,2 @@ -google-cloud-aiplatform \ No newline at end of file +#google-cloud-aiplatform +../../ diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d09caa45e0..02c9454efa 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-aiplatform +#google-cloud-aiplatform From 35e87995b1b7d9b4c0e6953a9696c25d6d12693a Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 28 Jun 2022 16:37:38 -0700 Subject: [PATCH 17/36] Convert the lambda function to avoid the import numpy be called in the confest.py test --- .../vizier/pyvizier/base_study_config.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py index d94a7d6de0..51ec5fd400 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -124,27 +124,37 @@ class MetricInformation: kw_only=True, ) - # Minimum value of this metric can be optionally specified. - try: - import numpy as np - except: - raise ImportError('... install with google-cloud-aiplatform[vizier]') + def min_value_converter(x: Optional[float]): + try: + import numpy as np + except: + raise ImportError('... install with google-cloud-aiplatform[vizier]') + return float(x) if x is not None else -np.inf + min_value: float = attr.field( init=True, default=None, # FYI: Converter is applied before validator. - converter=lambda x: float(x) if x is not None else -np.inf, + converter=min_value_converter, validator=[attr.validators.instance_of(float), _min_leq_max], kw_only=True, ) + def max_value_converter(x: Optional[float]): + try: + import numpy as np + except: + raise ImportError('... install with google-cloud-aiplatform[vizier]') + + return float(x) if x is not None else np.inf + # Maximum value of this metric can be optionally specified. max_value: float = attr.field( init=True, default=None, # FYI: Converter is applied before validator. - converter=lambda x: float(x) if x is not None else np.inf, + converter=max_value_converter, validator=[attr.validators.instance_of(float), _max_geq_min], on_setattr=attr.setters.validate, kw_only=True, From 38b55e0e3a37e1e8b627049c549a2f9d53924fd0 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 28 Jun 2022 17:01:28 -0700 Subject: [PATCH 18/36] Revert the requirements file. --- samples/model-builder/requirements.txt | 3 +-- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/samples/model-builder/requirements.txt b/samples/model-builder/requirements.txt index d29e95742b..d09caa45e0 100644 --- a/samples/model-builder/requirements.txt +++ b/samples/model-builder/requirements.txt @@ -1,2 +1 @@ -#google-cloud-aiplatform -../../ +google-cloud-aiplatform diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 02c9454efa..d09caa45e0 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1 @@ -#google-cloud-aiplatform +google-cloud-aiplatform From d681cea8087087aa42003d9aa97fd412bcc43f49 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 28 Jun 2022 17:10:01 -0700 Subject: [PATCH 19/36] Fix the lint error by running the nox -s blacken. --- .../vizier/pyvizier/base_study_config.py | 30 +++++++++---------- .../vizier/pyvizier/parameter_config.py | 2 ++ .../cloud/aiplatform/vizier/pyvizier/trial.py | 3 ++ 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py index 51ec5fd400..b397ea2262 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py @@ -125,13 +125,13 @@ class MetricInformation: ) def min_value_converter(x: Optional[float]): - try: + try: import numpy as np - except: - raise ImportError('... install with google-cloud-aiplatform[vizier]') - - return float(x) if x is not None else -np.inf - + except ModuleNotFoundError: + raise ImportError("... install with google-cloud-aiplatform[vizier]") + + return float(x) if x is not None else -np.inf + min_value: float = attr.field( init=True, default=None, @@ -142,12 +142,12 @@ def min_value_converter(x: Optional[float]): ) def max_value_converter(x: Optional[float]): - try: + try: import numpy as np - except: - raise ImportError('... install with google-cloud-aiplatform[vizier]') - - return float(x) if x is not None else np.inf + except ModuleNotFoundError: + raise ImportError("... install with google-cloud-aiplatform[vizier]") + + return float(x) if x is not None else np.inf # Maximum value of this metric can be optionally specified. max_value: float = attr.field( @@ -173,8 +173,8 @@ def min_value_or(self, default_value_fn: Callable[[], float]) -> float: """ try: import numpy as np - except: - raise ImportError('... install with numpy') + except ModuleNotFoundError: + raise ImportError("... install with numpy") if np.isfinite(self.min_value): return self.min_value @@ -194,8 +194,8 @@ def max_value_or(self, default_value_fn: Callable[[], float]) -> float: """ try: import numpy as np - except: - raise ImportError('... install with numpy') + except ModuleNotFoundError: + raise ImportError("... install with numpy") if np.isfinite(self.max_value): return self.max_value else: diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py index 873126537e..729b7a8882 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py @@ -459,6 +459,7 @@ def continuify(self) -> "ParameterConfig": scale_type = self.scale_type if scale_type == ScaleType.UNIFORM_DISCRETE: from absl import logging + logging.log_every_n( logging.WARNING, "Converting a UNIFORM_DISCRETE scaled discrete parameter " @@ -511,6 +512,7 @@ def merge( ) if one.scale_type != other.scale_type: from absl import logging + logging.warning("Scale type conflicts while merging %s and %s", one, other) if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py index f939a9d209..581a8ab1dc 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ b/google/cloud/aiplatform/vizier/pyvizier/trial.py @@ -66,6 +66,7 @@ def _std_not_negative(self, _, stddev): kw_only=True, ) + @attr.s(auto_attribs=True, frozen=True, init=True, slots=True, repr=False) class ParameterValue: """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. @@ -188,6 +189,7 @@ class Measurement: def _value_is_finite(self, _, value): import numpy as np + if not (np.isfinite(value) and value >= 0): raise ValueError("Must be finite and non-negative.") @@ -407,6 +409,7 @@ def status(self) -> TrialStatus: def is_completed(self) -> bool: """Returns True if this Trial is completed.""" from absl import logging + if self.status == TrialStatus.COMPLETED: if self.completion_time is None: logging.warning( From 5644f96784267e18ad1066795edf7efdf43dcf32 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 28 Jun 2022 17:29:13 -0700 Subject: [PATCH 20/36] Fixed the syntax issue in the setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 39a2cb7574..d19b4ed692 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,7 @@ "googleapis-common-protos==1.56.0", "google-api-python-client==1.12.8", "sqlalchemy==1.4", +] private_endpoints_extra_require = [ "urllib3 >=1.21.1, <1.27", @@ -136,7 +137,7 @@ "xai": xai_extra_require, "lit": lit_extra_require, "cloud_profiler": profiler_extra_require, - "pipelines": pipelines_extra_requires, + "pipelines": pipelines_extra_require, "vizier": vizier_extra_require, "datasets": datasets_extra_require, "private_endpoints": private_endpoints_extra_require, From 317a0d60ef7bd6de37b63af016ebf3f9e7de2a79 Mon Sep 17 00:00:00 2001 From: halio-g Date: Wed, 6 Jul 2022 17:19:18 -0700 Subject: [PATCH 21/36] Setup the local package --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d19b4ed692..6183c02846 100644 --- a/setup.py +++ b/setup.py @@ -64,12 +64,13 @@ "absl-py>=0.7", "numpy>=1.19.0", "protobuf>=3.6,<4.0", - "pytype==2022.1.5", + #"pytype==2022.1.5", "keras-tuner>=1.0,<2.0", "portpicker==1.3.1", "googleapis-common-protos==1.56.0", "google-api-python-client==1.12.8", - "sqlalchemy==1.4", +# "sqlalchemy==1.4", +# "google-vizier<=0.0.2a0", ] private_endpoints_extra_require = [ @@ -128,6 +129,7 @@ "google-cloud-storage >= 1.32.0, < 3.0.0dev", "google-cloud-bigquery >= 1.15.0, < 3.0.0dev", "google-cloud-resource-manager >= 1.3.3, < 3.0.0dev", + "google-vizier @ file://localhost//root/python-aiplatform/google-vizier/#egg=google-vizier", ), extras_require={ "full": full_extra_require, @@ -155,5 +157,8 @@ "Topic :: Internet", "Topic :: Software Development :: Libraries :: Python Modules", ], +# dependency_links=[ +# os.path.join(os.getcwd(), 'google-vizier', 'google_vizier-0.0.3a0-py3.7.egg') +# ], zip_safe=False, ) From 24098ddba3c05c8c97e72634ad61a16a46bc6a39 Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 7 Jul 2022 13:41:20 -0700 Subject: [PATCH 22/36] scrube the TODO since it's the documentation ticket. --- google/cloud/aiplatform/vizier/client_abc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/aiplatform/vizier/client_abc.py b/google/cloud/aiplatform/vizier/client_abc.py index 54c6704aff..049a7319ec 100644 --- a/google/cloud/aiplatform/vizier/client_abc.py +++ b/google/cloud/aiplatform/vizier/client_abc.py @@ -13,7 +13,6 @@ Modifying the returned object does not update the Vizier service. """ -# TODO(b/182496749): Add a dedicated .md file with more code examples. from __future__ import annotations import abc From 7c1bd0f20ef52ae8ddd572e317937ef1ceb64e1e Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 8 Jul 2022 16:23:01 -0700 Subject: [PATCH 23/36] Addresses the comments. --- google/cloud/aiplatform/vizier/__init__.py | 2 +- google/cloud/aiplatform/vizier/client_abc.py | 18 +- google/cloud/aiplatform/vizier/study.py | 184 ++++++++++++++----- google/cloud/aiplatform/vizier/trial.py | 6 +- tests/system/aiplatform/test_vizier.py | 1 + tests/unit/aiplatform/test_vizier.py | 20 +- 6 files changed, 173 insertions(+), 58 deletions(-) diff --git a/google/cloud/aiplatform/vizier/__init__.py b/google/cloud/aiplatform/vizier/__init__.py index 8ff128e3e0..5fab17a0bb 100644 --- a/google/cloud/aiplatform/vizier/__init__.py +++ b/google/cloud/aiplatform/vizier/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google/cloud/aiplatform/vizier/client_abc.py b/google/cloud/aiplatform/vizier/client_abc.py index 049a7319ec..bb1c637fe2 100644 --- a/google/cloud/aiplatform/vizier/client_abc.py +++ b/google/cloud/aiplatform/vizier/client_abc.py @@ -1,3 +1,18 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Cross-platform Vizier client interfaces. Aside from "materialize_" methods, code written using these interfaces are @@ -15,9 +30,8 @@ from __future__ import annotations -import abc - from typing import Optional, Collection, Type, TypeVar, Mapping, Any +import abc from google.cloud.aiplatform.vizier import pyvizier as vz diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py index c7feb2887f..023e1e5a1c 100644 --- a/google/cloud/aiplatform/vizier/study.py +++ b/google/cloud/aiplatform/vizier/study.py @@ -14,18 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy - from typing import Optional, Collection, Type, TypeVar -from google.cloud.aiplatform.vizier.client_abc import StudyInterface -from google.cloud.aiplatform.vizier.client_abc import TrialInterface from google.api_core import exceptions from google.auth import credentials as auth_credentials from google.cloud.aiplatform import base from google.cloud.aiplatform import utils from google.cloud.aiplatform import initializer -from google.cloud.aiplatform.vizier.trial import Trial +from google.cloud.aiplatform.vizier import client_abc from google.cloud.aiplatform.vizier import pyvizier as vz +from google.cloud.aiplatform.vizier.trial import Trial + from google.cloud.aiplatform.compat.types import study as gca_study @@ -34,7 +33,7 @@ _LOGGER = base.Logger(__name__) -class Study(base.VertexAiResourceNounWithFutureManager, StudyInterface): +class Study(base.VertexAiResourceNounWithFutureManager, client_abc.StudyInterface): """Manage Study resource for Vertex Vizier.""" client_class = utils.VizierClientWithOverride @@ -66,10 +65,10 @@ def __init__( Example: "projects/123/locations/us-central1/studies/12345678" or "12345678" when project and location are initialized or passed. project (str): - Optional. Project to retrieve feature from. If not set, project + Optional. Project to retrieve study from. If not set, project set in aiplatform.init will be used. location (str): - Optional. Location to retrieve feature from. If not set, location + Optional. Location to retrieve study from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve this Feature. Overrides @@ -87,8 +86,13 @@ def __init__( @classmethod @base.optional_sync() def create_or_load( - cls, display_name: str, problem: vz.ProblemStatement - ) -> StudyInterface: + cls, + display_name: str, + problem: vz.ProblemStatement, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> client_abc.StudyInterface: """Creates a Study resource. Example Usage: @@ -101,17 +105,35 @@ def create_or_load( root.add_float_param( 'learning_rate', 0.00001, 1.0, scale_type=pyvizier.ScaleType.LINEAR) root.add_categorical_param('optimizer', ['adagrad', 'adam', 'experimental']) - study = aiplatform.Study.create_or_load(display_name='display_name', problem=sc) + study = aiplatform.Study.create_or_load(display_name='tuning_study', problem=sc) Args: display_name (str): - A name to describe the Study. + Required. A name to describe the Study. It's unique per study. An existing study + will be returned if the study has the same display name. problem (vz.ProblemStatement): - Configurations of the study. It defines the problem to create the study. + Required. Configurations of the study. It defines the problem to create the study. + project (str): + Optional. Project to retrieve study from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve study from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve this Feature. Overrides + credentials set in aiplatform.init. + Returns: + StudyInterface - The created study resource object. """ + project = initializer.global_config.project if not project else project + location = initializer.global_config.location if not location else location + credentials = ( + initializer.global_config.credentials if not credentials else credentials + ) + api_client = cls._instantiate_client( - location=initializer.global_config.location, - credentials=initializer.global_config.credentials, + location=location, + credentials=credentials, ) study = gca_study.Study( display_name=display_name, study_spec=problem.to_proto() @@ -120,85 +142,147 @@ def create_or_load( try: study = api_client.create_study( parent=initializer.global_config.common_location_path( - initializer.global_config.project, - initializer.global_config.location, + project, + location, ), study=study, + credentials=credentials, ) except exceptions.AlreadyExists: - _LOGGER.info("The study is aleady created. Using existing study.") + _LOGGER.info("The study is already created. Using existing study.") study = api_client.lookup_study( request={ "parent": initializer.global_config.common_location_path( - initializer.global_config.project, - initializer.global_config.location, + project, + location, ), "display_name": display_name, - } + }, + credentials=credentials, ) return Study(study.name) - def get_trial(self, uid: int) -> TrialInterface: - """Retrieves the trial under the study by given trial id.""" + def get_trial(self, uid: int) -> client_abc.TrialInterface: + """Retrieves the trial under the study by given trial id. + + Args: + uid (int): Required. Unique identifier of the trial to search. + Returns: + TrialInterface - The trial resource object. + """ study_path_components = self._parse_resource_name(self.resource_name) - _LOGGER.info(study_path_components) return Trial( Trial._format_resource_name( project=study_path_components["project"], location=study_path_components["location"], study=study_path_components["study"], trial=uid, - ) + ), + credentials=self.credentials, ) def trials( self, trial_filter: Optional[vz.TrialFilter] = None - ) -> Collection[TrialInterface]: - """Fetches a collection of trials.""" + ) -> Collection[client_abc.TrialInterface]: + """Fetches a collection of trials. + + Args: + trial_filter (int): Optional. A filter for the trials. + Returns: + Collection[TrialInterface] - A list of trials resource object belonging + to the study. + """ list_trials_request = {"parent": self.resource_name} - trials_response = self.api_client.list_trials(request=list_trials_request) - return [Trial(trial.name) for trial in trials_response.trials] + trials_response = self.api_client.list_trials( + request=list_trials_request, credentials=self.credentials + ) + return [ + Trial._construct_sdk_resource_from_gapic( + trial, + project=self.project, + location=self.location, + credentials=self.credentials, + ) + for trial in trials_response.trials + ] + + def optimal_trials(self) -> Collection[client_abc.TrialInterface]: + """Returns optimal trial(s). - def optimal_trials(self) -> Collection[TrialInterface]: - """Returns optimal trial(s).""" + Returns: + Collection[TrialInterface] - A list of optimal trials resource object. + """ list_optimal_trials_request = {"parent": self.resource_name} optimal_trials_response = self.api_client.list_optimal_trials( - request=list_optimal_trials_request + request=list_optimal_trials_request, credentials=self.credentials ) - return [Trial(trial.name) for trial in optimal_trials_response.optimal_trials] + return [ + Trial._construct_sdk_resource_from_gapic( + trial, + project=self.project, + location=self.location, + credentials=self.credentials, + ) + for trial in optimal_trials_response.optimal_trials + ] def materialize_study_config(self) -> vz.StudyConfig: - """#Materializes the study config.""" - study = self.api_client.get_study(name=self.resource_name) + """#Materializes the study config. + + Returns: + StudyConfig - A deepcopy of StudyConfig from the study. + """ + study = self.api_client.get_study( + name=self.resource_name, credentials=self.credentials + ) return copy.deepcopy(vz.StudyConfig.from_proto(study.study_spec)) @classmethod - def from_uid(cls: Type[_T], uid: str) -> _T: + def from_uid( + cls: Type[_T], + uid: str, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> _T: """Fetches an existing study from the Vizier service. Args: - uid: Unique identifier of the study. + uid (str): Required. Unique identifier of the study. + Returns: + StudyInterface - The study resource object. """ - return Study(study_id=uid) + project = initializer.global_config.project if not project else project + location = initializer.global_config.location if not location else location + credentials = ( + initializer.global_config.credentials if not credentials else credentials + ) + + return Study( + study_id=uid, project=project, location=location, credentials=credentials + ) def suggest( self, *, count: Optional[int] = None, worker: str = "" - ) -> Collection[TrialInterface]: + ) -> Collection[client_abc.TrialInterface]: """Returns Trials to be evaluated by worker. Args: - count: Number of suggestions. - worker: When new Trials are generated, their `assigned_worker` field is - populated with this worker. suggest() first looks for existing Trials - that are assigned to `worker`, before generating new ones. + count (int): Optional. Number of suggestions. + worker (str): When new Trials are generated, their `assigned_worker` field is + populated with this worker. suggest() first looks for existing Trials + that are assigned to `worker`, before generating new ones. + Returns: + Collection[TrialInterface] - A list of suggested trial resource objects. """ suggest_trials_lro = self.api_client.suggest_trials( request={ "parent": self.resource_name, "suggestion_count": count, "client_id": worker, - } + }, + credentials=self.credentials, ) _LOGGER.log_action_started_against_resource_with_lro( "Suggest", "study", self.__class__, suggest_trials_lro @@ -206,8 +290,18 @@ def suggest( _LOGGER.info(self.client_class.get_gapic_client_class()) trials = suggest_trials_lro.result() _LOGGER.log_action_completed_against_resource("study", "suggested", self) - return [Trial(trial.name) for trial in trials.trials] + return [ + Trial._construct_sdk_resource_from_gapic( + trial, + project=self.project, + location=self.location, + credentials=self.credentials, + ) + for trial in trials.trials + ] def delete(self) -> None: """Deletes the study.""" - self.api_client.delete_study(name=self.resource_name) + self.api_client.delete_study( + name=self.resource_name, credentials=self.credentials + ) diff --git a/google/cloud/aiplatform/vizier/trial.py b/google/cloud/aiplatform/vizier/trial.py index 8fa2dd9328..310f08e36b 100644 --- a/google/cloud/aiplatform/vizier/trial.py +++ b/google/cloud/aiplatform/vizier/trial.py @@ -65,10 +65,10 @@ def __init__( Example: "projects/123/locations/us-central1/studies/12345678" or "12345678" when project and location are initialized or passed. project (str): - Optional. Project to retrieve feature from. If not set, project + Optional. Project to retrieve trial from. If not set, project set in aiplatform.init will be used. location (str): - Optional. Location to retrieve feature from. If not set, location + Optional. Location to retrieve trial from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve this Feature. Overrides @@ -129,7 +129,7 @@ def complete( Args: measurement: Final measurement. - infeasible_reason: Infeasible reason for missing final measurement. + infeasible_reason: Indefeasibly reason for missing final measurement. """ complete_trial_request = {"name": self.resource_name} if infeasible_reason is not None: diff --git a/tests/system/aiplatform/test_vizier.py b/tests/system/aiplatform/test_vizier.py index 2c949ade83..64a20f4d6c 100644 --- a/tests/system/aiplatform/test_vizier.py +++ b/tests/system/aiplatform/test_vizier.py @@ -8,6 +8,7 @@ _TEST_STUDY_ID = 123 +@pytest.mark.usefixtures("tear_down_resources") class TestVizier(e2e_base.TestEndToEnd): _temp_prefix = "temp_vertex_sdk_e2e_vizier_test" diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index 1f1e12f1b4..6d38e7963f 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -288,7 +288,7 @@ def test_create_study(self, create_study_mock): ) create_study_mock.assert_called_once_with( - parent=_TEST_PARENT, study=_TEST_STUDY + parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY ) assert type(study) == aiplatform.Study @@ -318,7 +318,8 @@ def test_create_study_already_exists( ) lookup_study_mock.assert_called_once_with( - request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME} + request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME}, + credentials=ANY ) assert type(study) == aiplatform.Study @@ -347,7 +348,7 @@ def test_materialize_study_config(self, create_study_mock): study_config = study.materialize_study_config() create_study_mock.assert_called_once_with( - parent=_TEST_PARENT, study=_TEST_STUDY + parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY ) assert type(study_config) == pyvizier.StudyConfig @@ -380,7 +381,8 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): "parent": _TEST_STUDY_NAME, "suggestion_count": 5, "client_id": "test_worker", - } + }, + credentials=ANY ) assert type(trials[0]) == aiplatform.Trial @@ -417,7 +419,7 @@ def test_delete(self, create_study_mock, delete_study_mock): study.delete() - delete_study_mock.assert_called_once_with(name=_TEST_STUDY_NAME) + delete_study_mock.assert_called_once_with(name=_TEST_STUDY_NAME, credentials=ANY) @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") def test_optimal_trials(self, list_optimal_trials_mock): @@ -444,7 +446,8 @@ def test_optimal_trials(self, list_optimal_trials_mock): trials = study.optimal_trials() list_optimal_trials_mock.assert_called_once_with( - request={"parent": _TEST_STUDY_NAME} + request={"parent": _TEST_STUDY_NAME}, + credentials=ANY ) assert type(trials[0]) == aiplatform.Trial @@ -472,7 +475,10 @@ def test_list_trials(self, list_trials_mock): trials = study.trials() - list_trials_mock.assert_called_once_with(request={"parent": _TEST_STUDY_NAME}) + list_trials_mock.assert_called_once_with( + request={"parent": _TEST_STUDY_NAME}, + credentials=ANY + ) assert type(trials[0]) == aiplatform.Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock") From ddd51eb131006ac510af666868c3d9d2bc394447 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 8 Jul 2022 16:26:04 -0700 Subject: [PATCH 24/36] Ran blacken on the test_vizier file. --- tests/unit/aiplatform/test_vizier.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index 6d38e7963f..cacb4d6c1a 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -319,7 +319,7 @@ def test_create_study_already_exists( lookup_study_mock.assert_called_once_with( request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME}, - credentials=ANY + credentials=ANY, ) assert type(study) == aiplatform.Study @@ -382,7 +382,7 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): "suggestion_count": 5, "client_id": "test_worker", }, - credentials=ANY + credentials=ANY, ) assert type(trials[0]) == aiplatform.Trial @@ -419,7 +419,9 @@ def test_delete(self, create_study_mock, delete_study_mock): study.delete() - delete_study_mock.assert_called_once_with(name=_TEST_STUDY_NAME, credentials=ANY) + delete_study_mock.assert_called_once_with( + name=_TEST_STUDY_NAME, credentials=ANY + ) @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") def test_optimal_trials(self, list_optimal_trials_mock): @@ -446,8 +448,7 @@ def test_optimal_trials(self, list_optimal_trials_mock): trials = study.optimal_trials() list_optimal_trials_mock.assert_called_once_with( - request={"parent": _TEST_STUDY_NAME}, - credentials=ANY + request={"parent": _TEST_STUDY_NAME}, credentials=ANY ) assert type(trials[0]) == aiplatform.Trial @@ -476,8 +477,7 @@ def test_list_trials(self, list_trials_mock): trials = study.trials() list_trials_mock.assert_called_once_with( - request={"parent": _TEST_STUDY_NAME}, - credentials=ANY + request={"parent": _TEST_STUDY_NAME}, credentials=ANY ) assert type(trials[0]) == aiplatform.Trial From d01d5ad42b162d7a91a46fa766337283d0e34a0e Mon Sep 17 00:00:00 2001 From: halio-g Date: Wed, 13 Jul 2022 16:00:16 -0700 Subject: [PATCH 25/36] Import the OSS in the Vertex SDK. --- .../aiplatform/vizier/pyvizier/__init__.py | 51 ++++++----- .../vizier/pyvizier/proto_converters.py | 89 ++++++++++--------- .../vizier/pyvizier/study_config.py | 49 +++++----- setup.py | 3 +- 4 files changed, 98 insertions(+), 94 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index 3082ffeb8e..590673778b 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -1,44 +1,47 @@ """PyVizier classes for Pythia policies.""" -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricInformation -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricsConfig -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import MetricType -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ( +from vizier.pyvizier import MetricInformation +from vizier.pyvizier import MetricsConfig +from vizier.pyvizier import MetricType +from vizier.pyvizier import ( ObjectiveMetricGoal, ) -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ProblemStatement -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import SearchSpace -from google.cloud.aiplatform.vizier.pyvizier.base_study_config import ( +from vizier.pyvizier import ProblemStatement +from vizier.pyvizier import SearchSpace +from vizier.pyvizier import ( SearchSpaceSelector, ) -from google.cloud.aiplatform.vizier.pyvizier.common import Metadata -from google.cloud.aiplatform.vizier.pyvizier.common import MetadataValue -from google.cloud.aiplatform.vizier.pyvizier.common import Namespace +from vizier.pyvizier import Metadata +from vizier.pyvizier import MetadataValue +from vizier.pyvizier import Namespace +from vizier.pyvizier import ExternalType +from vizier.pyvizier import ParameterConfig +from vizier.pyvizier import ParameterType +from vizier.pyvizier import ScaleType +from vizier.pyvizier import CompletedTrial +from vizier.pyvizier import Measurement +from vizier.pyvizier import MonotypeParameterSequence +from vizier.pyvizier import Metric +from vizier.pyvizier import ParameterDict +from vizier.pyvizier import ParameterValue +from vizier.pyvizier import Trial +from vizier.pyvizier import ParameterValueTypes +from vizier.pyvizier import TrialFilter +from vizier.pyvizier import TrialStatus +from vizier.pyvizier import TrialSuggestion + +from google.cloud.aiplatform.vizier.pyvizier.proto_converters import TrialConverter from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ( ParameterConfigConverter, ) from google.cloud.aiplatform.vizier.pyvizier.proto_converters import ( MeasurementConverter, ) -from google.cloud.aiplatform.vizier.pyvizier.proto_converters import TrialConverter from google.cloud.aiplatform.vizier.pyvizier.study_config import StudyConfig from google.cloud.aiplatform.vizier.pyvizier.study_config import Algorithm from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import ( AutomatedStoppingConfig, ) -from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ExternalType -from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterConfig -from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ParameterType -from google.cloud.aiplatform.vizier.pyvizier.parameter_config import ScaleType -from google.cloud.aiplatform.vizier.pyvizier.trial import CompletedTrial -from google.cloud.aiplatform.vizier.pyvizier.trial import Measurement -from google.cloud.aiplatform.vizier.pyvizier.trial import Metric -from google.cloud.aiplatform.vizier.pyvizier.trial import ParameterDict -from google.cloud.aiplatform.vizier.pyvizier.trial import ParameterValue -from google.cloud.aiplatform.vizier.pyvizier.trial import Trial -from google.cloud.aiplatform.vizier.pyvizier.trial import TrialFilter -from google.cloud.aiplatform.vizier.pyvizier.trial import TrialStatus -from google.cloud.aiplatform.vizier.pyvizier.trial import TrialSuggestion __all__ = ( "MetricInformation", diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py index 6385f05011..9099e0ac8d 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -5,31 +5,34 @@ from google.protobuf import duration_pb2 from google.cloud.aiplatform.compat.types import study as study_pb2 -from google.cloud.aiplatform.vizier.pyvizier import parameter_config -from google.cloud.aiplatform.vizier.pyvizier import trial +from google.cloud.aiplatform.vizier.pyvizier import ScaleType +from google.cloud.aiplatform.vizier.pyvizier import ParameterType +from google.cloud.aiplatform.vizier.pyvizier import ParameterValue +from google.cloud.aiplatform.vizier.pyvizier import MonotypeParameterSequence +from google.cloud.aiplatform.vizier.pyvizier import ParameterConfig +from google.cloud.aiplatform.vizier.pyvizier import Measurement +from google.cloud.aiplatform.vizier.pyvizier import Metric +from google.cloud.aiplatform.vizier.pyvizier import TrialStatus +from google.cloud.aiplatform.vizier.pyvizier import Trial -ScaleType = parameter_config.ScaleType _ScaleTypePb2 = study_pb2.StudySpec.ParameterSpec.ScaleType -ParameterType = parameter_config.ParameterType -MonotypeParameterSequence = parameter_config.MonotypeParameterSequence - class _ScaleTypeMap: """Proto converter for scale type.""" _pyvizier_to_proto = { - parameter_config.ScaleType.LINEAR: _ScaleTypePb2.UNIT_LINEAR_SCALE, - parameter_config.ScaleType.LOG: _ScaleTypePb2.UNIT_LOG_SCALE, - parameter_config.ScaleType.REVERSE_LOG: _ScaleTypePb2.UNIT_REVERSE_LOG_SCALE, + ScaleType.LINEAR: _ScaleTypePb2.UNIT_LINEAR_SCALE, + ScaleType.LOG: _ScaleTypePb2.UNIT_LOG_SCALE, + ScaleType.REVERSE_LOG: _ScaleTypePb2.UNIT_REVERSE_LOG_SCALE, } _proto_to_pyvizier = {v: k for k, v in _pyvizier_to_proto.items()} @classmethod - def to_proto(cls, pyvizier: parameter_config.ScaleType) -> _ScaleTypePb2: + def to_proto(cls, pyvizier: ScaleType) -> _ScaleTypePb2: return cls._pyvizier_to_proto[pyvizier] @classmethod - def from_proto(cls, proto: _ScaleTypePb2) -> parameter_config.ScaleType: + def from_proto(cls, proto: _ScaleTypePb2) -> ScaleType: return cls._proto_to_pyvizier[proto] @@ -101,7 +104,7 @@ def from_proto( proto: study_pb2.StudySpec.ParameterSpec, *, strict_validation: bool = False - ) -> parameter_config.ParameterConfig: + ) -> ParameterConfig: """Creates a ParameterConfig. Args: @@ -153,7 +156,7 @@ def from_proto( scale_type = _ScaleTypeMap.from_proto(proto.scale_type) try: - config = parameter_config.ParameterConfig.factory( + config = ParameterConfig.factory( name=proto.parameter_id, feasible_values=feasible_values, bounds=bounds, @@ -178,7 +181,7 @@ def from_proto( def _set_child_parameter_configs( cls, parent_proto: study_pb2.StudySpec.ParameterSpec, - pc: parameter_config.ParameterConfig, + pc: ParameterConfig, ): """Sets the parent_proto's conditional_parameter_specs field. @@ -190,7 +193,7 @@ def _set_child_parameter_configs( ValueError: If the child configs are invalid """ children: List[ - Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] + Tuple[MonotypeParameterSequence, ParameterConfig] ] = [] for child in pc.child_parameter_configs: children.append((child.matching_parent_values, child)) @@ -238,7 +241,7 @@ def _set_child_parameter_configs( @classmethod def to_proto( - cls, pc: parameter_config.ParameterConfig + cls, pc: ParameterConfig ) -> study_pb2.StudySpec.ParameterSpec: """Returns a ParameterConfig Proto.""" proto = study_pb2.StudySpec.ParameterSpec(parameter_id=pc.name) @@ -260,12 +263,12 @@ def to_proto( class ParameterValueConverter: - """Converter for trial.ParameterValue.""" + """Converter for ParameterValue.""" @classmethod def from_proto( cls, proto: study_pb2.Trial.Parameter - ) -> Optional[trial.ParameterValue]: + ) -> Optional[ParameterValue]: """Returns whichever value that is populated, or None.""" potential_value = proto.value if ( @@ -273,13 +276,13 @@ def from_proto( or isinstance(potential_value, str) or isinstance(potential_value, bool) ): - return trial.ParameterValue(potential_value) + return ParameterValue(potential_value) else: return None @classmethod def to_proto( - cls, parameter_value: trial.ParameterValue, name: str + cls, parameter_value: ParameterValue, name: str ) -> study_pb2.Trial.Parameter: """Returns Parameter Proto.""" proto = study_pb2.Trial.Parameter(parameter_id=name) @@ -297,10 +300,10 @@ def to_proto( class MeasurementConverter: - """Converter for trial.MeasurementConverter.""" + """Converter for MeasurementConverter.""" @classmethod - def from_proto(cls, proto: study_pb2.Measurement) -> trial.Measurement: + def from_proto(cls, proto: study_pb2.Measurement) -> Measurement: """Creates a valid instance from proto. Args: @@ -330,17 +333,17 @@ def from_proto(cls, proto: study_pb2.Measurement) -> trial.Measurement: metrics[metric.metric_id].value, ) try: - metrics[metric.metric_id] = trial.Metric(value=metric.value) + metrics[metric.metric_id] = Metric(value=metric.value) except ValueError: pass - return trial.Measurement( + return Measurement( metrics=metrics, elapsed_secs=proto.elapsed_duration.seconds, steps=proto.step_count, ) @classmethod - def to_proto(cls, measurement: trial.Measurement) -> study_pb2.Measurement: + def to_proto(cls, measurement: Measurement) -> study_pb2.Measurement: """Converts to Measurement proto.""" proto = study_pb2.Measurement() for name, metric in measurement.metrics.items(): @@ -357,33 +360,33 @@ def to_proto(cls, measurement: trial.Measurement) -> study_pb2.Measurement: return proto -def _to_pyvizier_trial_status(proto_state: study_pb2.Trial.State) -> trial.TrialStatus: +def _to_pyvizier_trial_status(proto_state: study_pb2.Trial.State) -> TrialStatus: """from_proto conversion for Trial statuses.""" if proto_state == study_pb2.Trial.State.REQUESTED: - return trial.TrialStatus.REQUESTED + return TrialStatus.REQUESTED elif proto_state == study_pb2.Trial.State.ACTIVE: - return trial.TrialStatus.ACTIVE + return TrialStatus.ACTIVE if proto_state == study_pb2.Trial.State.STOPPING: - return trial.TrialStatus.STOPPING + return TrialStatus.STOPPING if proto_state == study_pb2.Trial.State.SUCCEEDED: - return trial.TrialStatus.COMPLETED + return TrialStatus.COMPLETED elif proto_state == study_pb2.Trial.State.INFEASIBLE: - return trial.TrialStatus.COMPLETED + return TrialStatus.COMPLETED else: - return trial.TrialStatus.UNKNOWN + return TrialStatus.UNKNOWN def _from_pyvizier_trial_status( - status: trial.TrialStatus, infeasible: bool + status: TrialStatus, infeasible: bool ) -> study_pb2.Trial.State: """to_proto conversion for Trial states.""" - if status == trial.TrialStatus.REQUESTED: + if status == TrialStatus.REQUESTED: return study_pb2.Trial.State.REQUESTED - elif status == trial.TrialStatus.ACTIVE: + elif status == TrialStatus.ACTIVE: return study_pb2.Trial.State.ACTIVE - elif status == trial.TrialStatus.STOPPING: + elif status == TrialStatus.STOPPING: return study_pb2.Trial.State.STOPPING - elif status == trial.TrialStatus.COMPLETED: + elif status == TrialStatus.COMPLETED: if infeasible: return study_pb2.Trial.State.INFEASIBLE else: @@ -393,10 +396,10 @@ def _from_pyvizier_trial_status( class TrialConverter: - """Converter for trial.TrialConverter.""" + """Converter for TrialConverter.""" @classmethod - def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: + def from_proto(cls, proto: study_pb2.Trial) -> Trial: """Converts from Trial proto to object. Args: @@ -441,7 +444,7 @@ def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: if proto.start_time: creation_ts = proto.start_time.nanosecond / 1e9 creation_time = datetime.datetime.fromtimestamp(creation_ts) - return trial.Trial( + return Trial( id=int(proto.name.split("/")[-1]), description=proto.name, assigned_worker=proto.client_id or None, @@ -460,16 +463,16 @@ def from_proto(cls, proto: study_pb2.Trial) -> trial.Trial: ) # pytype: disable=wrong-arg-types @classmethod - def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[trial.Trial]: + def from_protos(cls, protos: Sequence[study_pb2.Trial]) -> List[Trial]: """Convenience wrapper for from_proto.""" return [TrialConverter.from_proto(proto) for proto in protos] @classmethod - def to_protos(cls, pytrials: Sequence[trial.Trial]) -> List[study_pb2.Trial]: + def to_protos(cls, pytrials: Sequence[Trial]) -> List[study_pb2.Trial]: return [TrialConverter.to_proto(pytrial) for pytrial in pytrials] @classmethod - def to_proto(cls, pytrial: trial.Trial) -> study_pb2.Trial: + def to_proto(cls, pytrial: Trial) -> study_pb2.Trial: """Converts a pyvizier Trial to a Trial proto.""" proto = study_pb2.Trial() if pytrial.description is not None: diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index 093ac26cc4..eaad85e9a8 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -18,21 +18,24 @@ from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union import attr -from google.cloud.aiplatform.vizier.pyvizier import automated_stopping +from google.cloud.aiplatform.vizier.pyvizier.automated_stopping import ( + AutomatedStoppingConfig, +) from google.cloud.aiplatform.vizier.pyvizier import proto_converters -from google.cloud.aiplatform.vizier.pyvizier import base_study_config -from google.cloud.aiplatform.vizier.pyvizier import parameter_config -from google.cloud.aiplatform.vizier.pyvizier import trial +from google.cloud.aiplatform.vizier.pyvizier import SearchSpace +from google.cloud.aiplatform.vizier.pyvizier import ProblemStatement +from google.cloud.aiplatform.vizier.pyvizier import SearchSpaceSelector +from google.cloud.aiplatform.vizier.pyvizier import MetricsConfig +from google.cloud.aiplatform.vizier.pyvizier import MetricInformation +from google.cloud.aiplatform.vizier.pyvizier import Trial +from google.cloud.aiplatform.vizier.pyvizier import ParameterValueTypes from google.cloud.aiplatform.compat.types import study as study_pb2 ################### PyTypes ################### -ScaleType = parameter_config.ScaleType -ExternalType = parameter_config.ExternalType # A sequence of possible internal parameter values. -MonotypeParameterSequence = parameter_config.MonotypeParameterSequence # Possible types for trial parameter values after cast to external types. ParameterValueSequence = Union[ - trial.ParameterValueTypes, + ParameterValueTypes, Sequence[int], Sequence[float], Sequence[str], @@ -41,10 +44,6 @@ ################### Enums ################### -# Values should NEVER be removed from ObjectiveMetricGoal, only added. -ObjectiveMetricGoal = base_study_config.ObjectiveMetricGoal - - class Algorithm(enum.Enum): """Valid Values for StudyConfig.Algorithm.""" @@ -74,12 +73,12 @@ class MetricInformationConverter: @classmethod def from_proto( cls, proto: study_pb2.StudySpec.MetricSpec - ) -> base_study_config.MetricInformation: + ) -> MetricInformation: """Converts a MetricInformation proto to a MetricInformation object.""" if proto.goal not in list(ObjectiveMetricGoal): raise ValueError("Unknown MetricInformation.goal: {}".format(proto.goal)) - return base_study_config.MetricInformation( + return MetricInformation( name=proto.metric_id, goal=proto.goal, safety_threshold=None, @@ -90,13 +89,13 @@ def from_proto( @classmethod def to_proto( - cls, obj: base_study_config.MetricInformation + cls, obj: MetricInformation ) -> study_pb2.StudySpec.MetricSpec: """Returns this object as a proto.""" return study_pb2.StudySpec.MetricSpec(metric_id=obj.name, goal=obj.goal.value) -class MetricsConfig(base_study_config.MetricsConfig): +class MetricsConfig(MetricsConfig): """Metrics config.""" @classmethod @@ -109,11 +108,11 @@ def to_proto(self) -> List[study_pb2.StudySpec.MetricSpec]: return [MetricInformationConverter.to_proto(metric) for metric in self] -SearchSpaceSelector = base_study_config.SearchSpaceSelector +SearchSpaceSelector = SearchSpaceSelector @attr.define(frozen=True, init=True, slots=True, kw_only=True) -class SearchSpace(base_study_config.SearchSpace): +class SearchSpace(SearchSpace): """A Selector for all, or part of a SearchSpace.""" @classmethod @@ -163,7 +162,7 @@ def parameter_protos(self) -> List[study_pb2.StudySpec.ParameterSpec]: # scale_type=pyvizier.ScaleType.LOG) # @attr.define(frozen=False, init=True, slots=True, kw_only=True) -class StudyConfig(base_study_config.ProblemStatement): +class StudyConfig(ProblemStatement): """A builder and wrapper for study_pb2.StudySpec proto.""" search_space: SearchSpace = attr.field( @@ -198,12 +197,12 @@ class StudyConfig(base_study_config.ProblemStatement): ) automated_stopping_config: Optional[ - automated_stopping.AutomatedStoppingConfig + AutomatedStoppingConfig ] = attr.field( init=True, default=None, validator=attr.validators.optional( - attr.validators.instance_of(automated_stopping.AutomatedStoppingConfig) + attr.validators.instance_of(AutomatedStoppingConfig) ), on_setattr=attr.setters.validate, kw_only=True, @@ -241,7 +240,7 @@ def from_proto(cls, proto: study_pb2.StudySpec) -> "StudyConfig": automated_stopping_config = None else: automated_stopping_config = ( - automated_stopping.AutomatedStoppingConfig.from_proto( + AutomatedStoppingConfig.from_proto( getattr(proto, oneof_name) ) ) @@ -306,7 +305,7 @@ def single_objective_metric_name(self) -> Optional[str]: return None def _trial_to_external_values( - self, pytrial: trial.Trial + self, pytrial: Trial ) -> Dict[str, Union[float, int, str, bool]]: """Returns the trial paremeter values cast to external types.""" parameter_values: Dict[str, Union[float, int, str]] = {} @@ -364,7 +363,7 @@ def trial_parameters( return self._pytrial_parameters(pytrial) def _pytrial_parameters( - self, pytrial: trial.Trial + self, pytrial: Trial ) -> Dict[str, ParameterValueSequence]: """Returns the trial values, cast to external types, if they exist. @@ -430,7 +429,7 @@ def trial_metrics( return self._pytrial_metrics(pytrial, include_all_metrics=include_all_metrics) def _pytrial_metrics( - self, pytrial: trial.Trial, *, include_all_metrics=False + self, pytrial: Trial, *, include_all_metrics=False ) -> Dict[str, float]: """Returns the trial's final measurement metric values. diff --git a/setup.py b/setup.py index 6183c02846..a435a37e82 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ "googleapis-common-protos==1.56.0", "google-api-python-client==1.12.8", # "sqlalchemy==1.4", -# "google-vizier<=0.0.2a0", + "google-vizier @ git+https://github.com/halio-g/vizier.git", ] private_endpoints_extra_require = [ @@ -129,7 +129,6 @@ "google-cloud-storage >= 1.32.0, < 3.0.0dev", "google-cloud-bigquery >= 1.15.0, < 3.0.0dev", "google-cloud-resource-manager >= 1.3.3, < 3.0.0dev", - "google-vizier @ file://localhost//root/python-aiplatform/google-vizier/#egg=google-vizier", ), extras_require={ "full": full_extra_require, From be868ee6db38463fd1ba940d54fe00dcaa81fd1a Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 15 Jul 2022 15:41:44 -0700 Subject: [PATCH 26/36] Already imported the package from open source vizier. Removing the code copied from oss. --- .../vizier/pyvizier/base_study_config.py | 1554 ----------------- .../vizier/pyvizier/base_study_config_test.py | 511 ------ .../aiplatform/vizier/pyvizier/common.py | 516 ------ .../aiplatform/vizier/pyvizier/common_test.py | 341 ---- .../aiplatform/vizier/pyvizier/context.py | 54 - .../vizier/pyvizier/context_test.py | 17 - .../vizier/pyvizier/metadata_util.py | 102 -- .../vizier/pyvizier/parameter_config.py | 590 ------- .../vizier/pyvizier/parameter_config_test.py | 360 ---- .../cloud/aiplatform/vizier/pyvizier/trial.py | 548 ------ .../aiplatform/vizier/pyvizier/trial_test.py | 211 --- 11 files changed, 4804 deletions(-) delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/base_study_config.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/common.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/common_test.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/context.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/context_test.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/metadata_util.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/parameter_config.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/trial.py delete mode 100644 google/cloud/aiplatform/vizier/pyvizier/trial_test.py diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py deleted file mode 100644 index b397ea2262..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config.py +++ /dev/null @@ -1,1554 +0,0 @@ -import collections -from collections import abc as collections_abc -import copy -import enum -import math -import re -from typing import ( - Callable, - Iterable, - Iterator, - List, - Optional, - Sequence, - Tuple, - Type, - TypeVar, - Union, - overload, -) - -import attr -from google.cloud.aiplatform.vizier.pyvizier import common -from google.cloud.aiplatform.vizier.pyvizier import parameter_config -from google.cloud.aiplatform.vizier.pyvizier import trial - -################### PyTypes ################### -ScaleType = parameter_config.ScaleType -ExternalType = parameter_config.ExternalType -# A sequence of possible internal parameter values. -MonotypeParameterSequence = parameter_config.MonotypeParameterSequence -_T = TypeVar("_T") - - -################### Helper Classes ################### -def _min_leq_max(instance: "MetricInformation", _, value: float): - if value > instance.max_value: - raise ValueError( - f"min_value={value} cannot exceed max_value={instance.max_value}." - ) - - -def _max_geq_min(instance: "MetricInformation", _, value: float): - if value < instance.min_value: - raise ValueError( - f"min_value={instance.min_value} cannot exceed max_value={value}." - ) - - -# Values should NEVER be removed from ObjectiveMetricGoal, only added. -class ObjectiveMetricGoal(enum.IntEnum): - """Valid Values for MetricInformation.Goal.""" - - MAXIMIZE = 1 - MINIMIZE = 2 - - # pylint: disable=comparison-with-callable - @property - def is_maximize(self) -> bool: - return self == self.MAXIMIZE - - @property - def is_minimize(self) -> bool: - return self == self.MINIMIZE - - -class MetricType(enum.Enum): - """Type of the metric. - - OBJECTIVE: Objective to be maximized / minimized. - SAFETY: Objective to be kept above / below a certain threshold. - """ - - OBJECTIVE = "OBJECTIVE" - SAFETY = "SAFETY" # Soft constraint - - # pylint: disable=comparison-with-callable - @property - def is_safety(self) -> bool: - return self == MetricType.SAFETY - - @property - def is_objective(self) -> bool: - return self == MetricType.OBJECTIVE - - -@attr.define(frozen=False, init=True, slots=True) -class MetricInformation: - """MetricInformation provides optimization metrics configuration.""" - - # The name of this metric. An empty string is allowed for single-metric - # optimizations. - name: str = attr.field( - init=True, default="", validator=attr.validators.instance_of(str) - ) - - goal: ObjectiveMetricGoal = attr.field( - init=True, - # pylint: disable=g-long-lambda - converter=ObjectiveMetricGoal, - validator=attr.validators.instance_of(ObjectiveMetricGoal), - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True, - ) - - # The following are only valid for Safety metrics. - # safety_threshold should always be set to a float (default 0.0), for safety - # metrics. - safety_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True, - ) - safety_std_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True, - ) - percentage_unsafe_trials_threshold: Optional[float] = attr.field( - init=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(float)), - kw_only=True, - ) - - def min_value_converter(x: Optional[float]): - try: - import numpy as np - except ModuleNotFoundError: - raise ImportError("... install with google-cloud-aiplatform[vizier]") - - return float(x) if x is not None else -np.inf - - min_value: float = attr.field( - init=True, - default=None, - # FYI: Converter is applied before validator. - converter=min_value_converter, - validator=[attr.validators.instance_of(float), _min_leq_max], - kw_only=True, - ) - - def max_value_converter(x: Optional[float]): - try: - import numpy as np - except ModuleNotFoundError: - raise ImportError("... install with google-cloud-aiplatform[vizier]") - - return float(x) if x is not None else np.inf - - # Maximum value of this metric can be optionally specified. - max_value: float = attr.field( - init=True, - default=None, - # FYI: Converter is applied before validator. - converter=max_value_converter, - validator=[attr.validators.instance_of(float), _max_geq_min], - on_setattr=attr.setters.validate, - kw_only=True, - ) - - def min_value_or(self, default_value_fn: Callable[[], float]) -> float: - """Returns the minimum value if finite, or default_value_fn(). - - Avoids the common pitfalls of using - `metric.min_value or default_value` - which would incorrectly use the default_value when min_value == 0, and - requires default_value to have been computed. - - Args: - default_value_fn: Default value if min_value is not finite. - """ - try: - import numpy as np - except ModuleNotFoundError: - raise ImportError("... install with numpy") - - if np.isfinite(self.min_value): - return self.min_value - else: - return default_value_fn() - - def max_value_or(self, default_value_fn: Callable[[], float]) -> float: - """Returns the minimum value if finite, or default_value_fn(). - - Avoids the common pitfalls of using - `metric.max_value or default_value` - which would incorrectly use the default_value when max_value == 0, and - requires default_value to have been computed. - - Args: - default_value_fn: Default value if max_value is not configured. - """ - try: - import numpy as np - except ModuleNotFoundError: - raise ImportError("... install with numpy") - if np.isfinite(self.max_value): - return self.max_value - else: - return default_value_fn() - - @property - def range(self) -> float: - """Range of the metric. Can be infinite.""" - return self.max_value - self.min_value - - @property - def type(self) -> MetricType: - if self.safety_threshold is not None or self.safety_std_threshold is not None: - return MetricType.SAFETY - else: - return MetricType.OBJECTIVE - - def flip_goal(self) -> "MetricInformation": - """Flips the goal in-place and returns the reference to self.""" - if self.goal == ObjectiveMetricGoal.MAXIMIZE: - self.goal = ObjectiveMetricGoal.MINIMIZE - else: - self.goal = ObjectiveMetricGoal.MAXIMIZE - return self - - -@attr.define(frozen=False, init=True, slots=True) -class MetricsConfig(collections_abc.Collection): - """Container for metrics. - - Metric names should be unique. - """ - - _metrics: List[MetricInformation] = attr.ib( - init=True, - factory=list, - converter=list, - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(MetricInformation), - iterable_validator=attr.validators.instance_of(Iterable), - ), - ) - - def item(self) -> MetricInformation: - if len(self._metrics) != 1: - raise ValueError("Can be called only when there is exactly one metric!") - return self._metrics[0] - - def _assert_names_are_unique(self) -> None: - counts = collections.Counter(metric.name for metric in self._metrics) - if len(counts) != len(self._metrics): - for name, count in counts.items(): - if count > 1: - raise ValueError( - f"Duplicate metric name: {name} in {self._metrics}" - ) - - def __attrs_post_init__(self): - self._assert_names_are_unique() - - def __iter__(self) -> Iterator[MetricInformation]: - return iter(self._metrics) - - def __contains__(self, x: object) -> bool: - return x in self._metrics - - def __len__(self) -> int: - return len(self._metrics) - - def __add__(self, metrics: Iterable[MetricInformation]) -> "MetricsConfig": - return MetricsConfig(self._metrics + list(metrics)) - - def of_type( - self, include: Union[MetricType, Iterable[MetricType]] - ) -> "MetricsConfig": - """Filters the Metrics by type.""" - if isinstance(include, MetricType): - include = (include,) - return MetricsConfig(m for m in self._metrics if m.type in include) - - def append(self, metric: MetricInformation): - self._metrics.append(metric) - self._assert_names_are_unique() - - def extend(self, metrics: Iterable[MetricInformation]): - for metric in metrics: - self.append(metric) - - @property - def is_single_objective(self) -> bool: - """Returns True if only one objective metric is configured.""" - return len(self.of_type(MetricType.OBJECTIVE)) == 1 - - -@attr.s(frozen=True, init=True, slots=True, kw_only=True) -class _PathSegment: - """Selection of a parameter name and one of its values.""" - - # A ParameterConfig name. - name: str = attr.ib( - init=True, validator=attr.validators.instance_of(str), kw_only=True - ) - - # A ParameterConfig value. - value: Union[int, float, str] = attr.ib( - init=True, - validator=attr.validators.instance_of((int, float, str)), - kw_only=True, - ) - - -class _PathSelector(Sequence[_PathSegment]): - """Immutable sequence of path segments.""" - - def __init__(self, iterable: Iterable[_PathSegment] = tuple()): - self._paths = tuple(iterable) - - @overload - def __getitem__(self, s: slice) -> "_PathSelector": - ... - - @overload - def __getitem__(self, i: int) -> _PathSegment: - ... - - def __getitem__(self, index): - item = self._paths[index] - if isinstance(item, _PathSegment): - return item - else: - return _PathSelector(item) - - def __len__(self) -> int: - """Returns the number of elements in the container.""" - return len(self._paths) - - def __add__( - self, other: Union[Sequence[_PathSegment], _PathSegment] - ) -> "_PathSelector": - if isinstance(other, _PathSegment): - other = [other] - return _PathSelector(self._paths + tuple(other)) - - def __str__(self) -> str: - """Returns the path as a string.""" - return "/".join(["{}={}".format(p.name, p.value) for p in self._paths]) - - -class InvalidParameterError(Exception): - """Error thrown when parameter values are invalid.""" - - -################### Main Classes ################### -@attr.s(frozen=True, init=True, slots=True, kw_only=True) -class SearchSpaceSelector: - """A Selector for all, or part of a SearchSpace.""" - - # List of ParameterConfig objects referenced by this selector. - # This is a reference to a list of objects owned by SearchSpace (and will - # typically include the entire SearchSpace). - _configs: List[parameter_config.ParameterConfig] = attr.ib( - init=True, - factory=list, - # Verify that this is a list of ParameterConfig objects. - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of( - parameter_config.ParameterConfig - ), - iterable_validator=attr.validators.instance_of(list), - ), - kw_only=True, - ) - - # _selected_path and _selected_name control how parameters are added to the - # search space. - # - # 1) If _selected_path is empty, and _selected_name is empty, parameters - # are added to the root of the search space. - # 2) If _selected_path is empty, and _selected_name is non-empty, parameters - # will be added as child parameters to all root and child parameters - # with name ==_selected_name. - # 3) If both _selected_path and _selected_name are specified, parameters will - # be added as child parameters to the parameter specified by the path and - # the name. - # 4) If _selected_path is non-empty, and _selected_name is empty, this is an - # error. - - # An ordered list of _PathSelector objects which uniquely identifies a path - # in a conditional tree. - _selected_path: _PathSelector = attr.ib( - init=True, - default=_PathSelector(), - converter=_PathSelector, - # Verify that this is a list of _PathSegment objects. - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(_PathSegment), - iterable_validator=attr.validators.instance_of(Iterable), - ), - kw_only=True, - ) - - # A ParameterConfig name. - # If there is a _selected_name, then there have to also be _selected_values - # below, and new parameters are added to the parent(s) selected by - # _selected_path and _selected_name. - _selected_name: str = attr.ib( - init=True, default="", validator=attr.validators.instance_of(str), kw_only=True - ) - - # List of ParameterConfig values from _configs. - # If there are _selected_values, then there have to also be _selected_name - # above. - _selected_values: MonotypeParameterSequence = attr.ib( - init=True, - factory=list, - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of(list), - ), - kw_only=True, - ) - - @property - def parameter_name(self) -> str: - """Returns the selected parameter name.""" - return self._selected_name - - @property - def parameter_values(self) -> MonotypeParameterSequence: - """Returns the selected parameter values.""" - return copy.deepcopy(self._selected_values) - - def add_float_param( - self, - name: str, - min_value: float, - max_value: float, - *, - default_value: Optional[float] = None, - scale_type: Optional[ScaleType] = ScaleType.LINEAR, - index: Optional[int] = None, - ) -> "SearchSpaceSelector": - """Adds floating point parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - min_value: Inclusive lower bound for the parameter. - max_value: Inclusive upper bound for the parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='rate' and index=0, then a single ParameterConfig with name - 'rate[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector(s) for the newly added parameter(s): - One SearchSpaceSelector if one parameter was added, or a list of - SearchSpaceSelector if multiple parameters were added. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - bounds = (float(min_value), float(max_value)) - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - bounds=bounds, - scale_type=scale_type, - default_value=default_value, - ) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_int_param( - self, - name: str, - min_value: int, - max_value: int, - *, - default_value: Optional[int] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None, - ) -> "SearchSpaceSelector": - """Adds integer parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - min_value: Inclusive lower bound for the parameter. - max_value: Inclusive upper bound for the parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='hidden_units' and index=0, then a single ParameterConfig with name - 'hidden_units[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If min_value or max_value are not integers. - ValueError: If `index` is invalid (e.g. negative). - """ - int_min_value = int(min_value) - if not math.isclose(min_value, int_min_value): - raise ValueError( - "min_value for an INTEGER parameter should be an integer" - ", got: [{}]".format(min_value) - ) - int_max_value = int(max_value) - if not math.isclose(max_value, int_max_value): - raise ValueError( - "max_value for an INTEGER parameter should be an integer" - ", got: [{}]".format(min_value) - ) - bounds = (int_min_value, int_max_value) - - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - bounds=bounds, - scale_type=scale_type, - default_value=default_value, - ) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_discrete_param( - self, - name: str, - feasible_values: Union[Sequence[float], Sequence[int]], - *, - default_value: Optional[Union[float, int]] = None, - scale_type: Optional[ScaleType] = ScaleType.LINEAR, - index: Optional[int] = None, - auto_cast: Optional[bool] = True, - ) -> "SearchSpaceSelector": - """Adds ordered numeric parameter config(s) with a finite set of values. - - IMPORTANT: If a parameter is discrete, its values are assumed to have - ordered semantics. Thus, you should not use discrete parameters for - unordered values such as ids. In this case, see add_categorical_param() - below. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: The set of feasible values for this parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='batch_size' and index=0, then a single ParameterConfig with name - 'batch_size[0]' is added. `index` should be >= 0. - auto_cast: If False, the external type will be set to INTEGER if all - values are castable to an integer without losing precision. If True, the - external type will be set to float. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - param_names = self._get_parameter_names_to_create(name=name, index=index) - - external_type = ExternalType.FLOAT - if auto_cast: - # If all feasible values are convertible to ints without loss of - # precision, annotate the external type as INTEGER. This will cast - # [0., 1., 2.] into [0, 1, 2] when parameter values are returned in - # clients. - if all([v == round(v) for v in feasible_values]): - external_type = ExternalType.INTEGER - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value, - external_type=external_type, - ) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def add_categorical_param( - self, - name: str, - feasible_values: Sequence[str], - *, - default_value: Optional[str] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None, - ) -> "SearchSpaceSelector": - """Adds unordered string-valued parameter config(s) to the search space. - - IMPORTANT: If a parameter is categorical, its values are assumed to be - unordered. If the `feasible_values` have ordering, use add_discrete_param() - above, since it will improve Vizier's model quality. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: The set of feasible values for this parameter. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='id' and index=0, then a single ParameterConfig with name 'id[0]' - is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `index` is invalid (e.g. negative). - """ - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value, - ) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def bool_to_string(x): - return "True" if x else "False" - - def add_bool_param( - self, - name: str, - feasible_values: Optional[Sequence[bool]] = None, - *, - default_value: Optional[bool] = None, - scale_type: Optional[ScaleType] = None, - index: Optional[int] = None, - ) -> "SearchSpaceSelector": - """Adds boolean-valued parameter config(s) to the search space. - - If select_all() was previously called for this selector, so it contains - selected parent values, the parameter configs will be added as child - parameters to the selected parameter configs, and a reference to this - selector is returned. - - If no parent values are selected, the parameter config(s) will be added at - the same level as currently selected parameters, and a reference to the - newly added parameters is returned. - - Args: - name: The parameter's name. Cannot be empty. - feasible_values: An optional list of feasible boolean values, i.e. one of - the following: [True], [False], [True, False], [False, True]. - default_value: A default value for the Parameter. - scale_type: Scaling to be applied. NOT VALIDATED. - index: Specifies the multi-dimensional index for this parameter. E.g. if - name='match' and index=0, then a single ParameterConfig with name - 'match[0]' is added. `index` should be >= 0. - - Returns: - SearchSpaceSelector for the newly added parameter. - - Raises: - ValueError: If `feasible_values` has invalid values. - ValueError: If `index` is invalid (e.g. negative). - """ - allowed_values = (None, (True, False), (False, True), (True,), (False,)) - if feasible_values not in allowed_values: - raise ValueError( - "feasible_values must be one of %s; got: %s." - % (allowed_values, feasible_values) - ) - if feasible_values is None: - categories = ("True", "False") - else: - categories = [self.bool_to_string(x) for x in feasible_values] - feasible_values = sorted(categories, reverse=True) - - if default_value is not None: - default_value = self.bool_to_string(default_value) - - param_names = self._get_parameter_names_to_create(name=name, index=index) - - new_params = [] - for param_name in param_names: - new_pc = parameter_config.ParameterConfig.factory( - name=param_name, - feasible_values=sorted(feasible_values), - scale_type=scale_type, - default_value=default_value, - external_type=ExternalType.BOOLEAN, - ) - new_params.append(new_pc) - return self._add_parameters(new_params)[0] - - def select( - self, - parameter_name: str, - parameter_values: Optional[MonotypeParameterSequence] = None, - ) -> "SearchSpaceSelector": - """Selects a single parameter specified by path and parameter_name. - - This method should be called to select a parent parameter, before calling - `add_*_param` methods to create child parameters. - - Given a selector to the root of the search space: - root = pyvizier.SearchSpace().select_root() - - 1) To select a parameter at the root of the search space, with parent values - for child parameters: - model = root.select('model_type', ['dnn']) - model.add_float_param('hidden_units', ...) - 2) To select a parameter at the root of the search space, and defer parent - value selection to later calls: - model = root.select('model_type') - # Add `hidden_units` and `optimizer_type` as `dnn` children. - model.select_values(['dnn']).add_float_param('hidden_units', ...) - model.select_values(['dnn']).add_categorical_param( - 'optimizer_type', ['adam', 'adagrad']) - # Add `optimizer_type` and `activation` as `linear` children. - model.select_values(['linear']).add_categorical_param( - 'optimizer_type', ['adam', 'ftrl']) - model.select_values(['linear']).add_categorical_param('activation', ...) - 3) To select a parameter in a conditional search space, specify a path, by - chaining select() calls: - optimizer = root.select('model_type', ['linear']).select('optimizer_type') - optimizer.select_values('adam').add_float_param('learning_rate', 0.001,..) - optimizer.select_values('ftrl').add_float_param('learning_rate', 0.1,..) - - # OR pre-select the parent parameter values: - optimizer = root.select('model_type', ['linear']).select( - 'optimizer_type', ['adam']) - optimizer.add_float_param('learning_rate', 0.001,...) - 4) If there is *only one* parameter with the given name, then it is possible - to select it without specifying the path, using: - selectors = root.select_all('activation') - # 'activation' exists only under model_type='linear'. - assert len(selectors) == 1 - activation = selectors[0] - - Args: - parameter_name: - parameter_values: Optional parameter values for this selector, which will - be used to add child parameters, or traverse a conditional tree. - - Returns: - A new SearchSpaceSelector. - """ - # Make sure parameter_name exists in the conditional parameters tree. - # parameter_values will be validated only when a child parameter is added. - if not self._parameter_exists(parameter_name): - raise ValueError("No parameter with name {} exists in this SearchSpace") - - path = [] - selected_values = [] - if parameter_values is not None: - if not isinstance(parameter_values, (list, tuple)): - raise ValueError( - "parameter_values should be a list or tuple, given " - "{} with type {}".format(parameter_values, type(parameter_values)) - ) - selected_values = parameter_values - - if self._selected_name: - # There is already a parameter name selected, so this is a chained select - # call. - if not self._selected_values: - raise ValueError( - "Cannot call select() again before parameter values " - "are selected: parameter {} was previously selected, " - " with the path: {}, but no values were selected for " - "it".format(self.parameter_name, self.path_string) - ) - # Return a new selector, with the currently selected parameter added to - # the path. - new_path_segment = [ - _PathSegment(name=self._selected_name, value=self._selected_values[0]) - ] - path = self._selected_path + new_path_segment - if not self._path_exists(path): - raise ValueError( - "Path {} does not exist in this SearchSpace: " - "{}".format((path), self) - ) - - return SearchSpaceSelector( - configs=self._configs, - selected_path=path, - selected_name=parameter_name, - selected_values=selected_values, - ) - - def select_values( - self, parameter_values: MonotypeParameterSequence - ) -> "SearchSpaceSelector": - """Selects values for a pre-selected parameter. - - This method should be called to select parent parameter(s) value(s), before - calling `add_*_param` methods to create child parameters. - - This method must be called AFTER select(). - This method mutates this selector. - - Args: - parameter_values: Parameter values for this selector, which will be used - to add child parameters. - - Returns: - SearchSpaceSelector - """ - if not self._selected_name: - raise ValueError("No parameter is selected. Call select() first.") - if not parameter_values: - raise ValueError( - "parameter_values cannot be empty. Specify at least one value." - ) - if not isinstance(parameter_values, (list, tuple)): - raise ValueError( - "parameter_values should be a list or tuple, given " - "{} with type {}".format(parameter_values, type(parameter_values)) - ) - # TODO: Allow to directly select boolean parent parameters. - object.__setattr__(self, "_selected_values", parameter_values) - return self - - def select_all( - self, parameter_name: str, parameter_values: MonotypeParameterSequence - ) -> List["SearchSpaceSelector"]: - """Select one or more parent parameters, with the same name. - - This method should be called to select parent parameter(s), before calling - `add_*_param` methods to create child parameters. - Multiple parent parameters with the same name are possible in a conditional - search space. See go/conditional-parameters for more details. - - 1) If the conditional search space has two parameters with the same - name, 'optimizer_type', given a selector to the root of the search space, - select_all() can be used to simultaneously add child parameters to both - 'optimizer_type` parameters: - - root = pyvizier.SearchSpace().select_root() - model.select_values(['dnn']).add_categorical_param( - 'optimizer_type', ['adam', 'adagrad']) - model.select_values(['linear']).add_categorical_param( - 'optimizer_type', ['adam', 'ftrl']) - # Add a 'learning_rate' parameter to both 'adam' optimizers: - optimizers = model.select_all('optimizer_type', parent_values=['adam']) - optimizers.add_float_param('learning_rate', ...) - - 2) If there is *only one* parameter with the given name, then it is also - possible to use select_all() to select it: - root = pyvizier.SearchSpace().select_root() - model.select_values(['dnn']).add_categorical_param('activation', ...) - # Select the single parameter with the name 'activation': - selectors = root.select_all('activation') - assert len(selectors) == 1 - activation = selector[0] - - Args: - parameter_name: - parameter_values: Optional parameter values for this selector, which will - be used to add child parameters. - - Returns: - List of SearchSpaceSelector - """ - # TODO: Raise an error if this selector already has selected_name. - # Make sure parameter_name exists in the conditional parameters tree. - if not self._parameter_exists(parameter_name): - raise ValueError("No parameter with name {} exists in this SearchSpace") - - if parameter_values is not None: - if not isinstance(parameter_values, (list, tuple)): - raise ValueError( - "parameter_values should be a list or tuple, given " - "{} with type {}".format(parameter_values, type(parameter_values)) - ) - # TODO: Complete this method. - raise NotImplementedError() - - def _path_exists(self, path: _PathSelector) -> bool: - """Checks if the path exists in the conditional tree.""" - for parent in self._configs: - if path[0].name == parent.name and path[0].value in parent.feasible_values: - if len(path) == 1: - # No need to recurse. - return True - return self._path_exists_inner(parent, path[1:]) - return False - - @classmethod - def _path_exists_inner( - cls, current_root: parameter_config.ParameterConfig, current_path: _PathSelector - ) -> bool: - """Returns true if the path exists, starting at root_parameter.""" - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if ( - current_path[0].name == child.name - and current_path[0].value in child.feasible_values - ): - child_idx = idx - break - if child_idx is None: - # No match is found. This path does not exist. - return False - if len(current_path) == 1: - # This is the end of the path. - return True - # Keep traversing. - return cls._path_exists_inner( - current_root.child_parameter_configs[child_idx], current_path[1:] - ) - - def _parameter_exists(self, parameter_name: str) -> bool: - """Checks if there exists at least one parameter with this name. - - Note that this method checks existence in the entire search space. - - Args: - parameter_name: - - Returns: - bool: Exists. - """ - found = False - for parent in self._configs: - for pc in parent.traverse(show_children=False): - if pc.name == parameter_name: - found = True - break - return found - - @classmethod - def _get_parameter_names_to_create( - cls, *, name: str, length: Optional[int] = None, index: Optional[int] = None - ) -> List[str]: - """Returns the names of all parameters which should be created. - - Args: - name: The base parameter name. - length: Specifies the length of a multi-dimensional parameters. If larger - than 1, then multiple ParameterConfigs are added. E.g. if name='rate' - and length=2, then two ParameterConfigs with names 'rate[0]', 'rate[1]' - are added. Cannot be specified together with `index`. - index: Specifies the multi-dimensional index for this parameter. Cannot be - specified together with `length`. E.g. if name='rate' and index=1, then - a single ParameterConfig with name 'rate[1]' is added. - - Returns: - List of parameter names to create. - - Raises: - ValueError: If `length` or `index` are invalid. - """ - if length is not None and index is not None: - raise ValueError( - "Only one of `length` and `index` can be specified. Got" - " length={}, index={}".format(length, index) - ) - if length is not None and length < 1: - raise ValueError("length must be >= 1. Got length={}".format(length)) - if index is not None and index < 0: - raise ValueError("index must be >= 0. Got index={}".format(index)) - - param_names = [] - if length is None and index is None: - # Add one parameter with no multi-dimensional index. - param_names.append(name) - elif index is not None: - # Add one parameter with a multi-dimensional index. - param_names.append(cls._multi_dimensional_parameter_name(name, index)) - elif length is not None: - # `length > 0' is synthatic sugar for multi multi-dimensional parameter. - # Each multi-dimensional parameter is encoded as a list of separate - # parameters with names equal to `name[index]` (index is zero based). - for i in range(length): - param_names.append(cls._multi_dimensional_parameter_name(name, i)) - return param_names - - @classmethod - def _multi_dimensional_parameter_name(cls, name: str, index: int) -> str: - """Returns the indexed parameter name.""" - return "{}[{}]".format(name, index) - - @classmethod - def parse_multi_dimensional_parameter_name( - cls, name: str - ) -> Optional[Tuple[str, int]]: - """Returns the base name for a multi-dimensional parameter name. - - Args: - name: A parameter name. - - Returns: - (base_name, index): if name='hidden_units[10]', base_name='hidden_units' - and index=10. - Returns None if name is not in the format 'base_name[idx]'. - """ - regex = r"(?P[^()]*)\[(?P\d+)\]$" - pattern = re.compile(regex) - matches = pattern.match(name) - if matches is None: - return None - return (matches.groupdict()["name"], int(matches.groupdict()["index"])) - - @property - def path_string(self) -> str: - """Returns the selected path as a string.""" - return str(self._selected_path) - - def _add_parameters( - self, parameters: List[parameter_config.ParameterConfig] - ) -> List["SearchSpaceSelector"]: - """Adds ParameterConfigs either to the root, or as child parameters. - - Args: - parameters: The parameters to add to the search space. - - Returns: - A list of SearchSpaceSelectors, one for each parameters added. - """ - if self._selected_name and not self._selected_values: - raise ValueError( - "Cannot add child parameters to parameter {}: parent values were " - "not selected. Call select_values() first.".format(self._selected_name) - ) - if not self._selected_name and self._selected_values: - raise ValueError( - "Cannot add child parameters: no parent name is selected." - " Call select() or select_all() first." - ) - if self._selected_path and not self._selected_name: - raise ValueError( - "Cannot add child parameters: path is specified ({}), but no parent" - " name is specified. Call select() or select_all() first".format( - self.path_string - ) - ) - - selectors: List["SearchSpaceSelector"] = [] - if not self._selected_path and not self._selected_name: - # If _selected_path is empty, and _selected_name is empty, parameters - # are added to the root of the search space. - self._configs.extend(parameters) - # Return Selectors for the newly added parameters. - for param in parameters: - selectors.append( - SearchSpaceSelector( - configs=self._configs, - selected_path=[], - selected_name=param.name, - selected_values=[], - ) - ) - elif not self._selected_path and self._selected_name: - # If _selected_path is empty, and _selected_name is not empty, parameters - # will be added as child parameters to *all* root and child parameters - # with name ==_selected_name. - for idx, root_param in enumerate(self._configs): - updated_param, new_selectors = self._recursive_add_child_parameters( - self._configs, - _PathSelector(), - root_param, - self._selected_name, - self._selected_values, - parameters, - ) - # Update the root ParameterConfig in place. - self._configs[idx] = updated_param - selectors.extend(new_selectors) - else: - # If both _selected_path and _selected_name are specified, parameters will - # be added as child parameters to the parameter specified by the path and - # the name. - idx, updated_param, new_selectors = self._add_parameters_at_selected_path( - root_configs=self._configs, - complete_path=self._selected_path, - parent_name=self._selected_name, - parent_values=self._selected_values, - new_children=parameters, - ) - # Update the root ParameterConfig in place. - self._configs[idx] = updated_param - selectors.extend(new_selectors) - - if not selectors: - raise ValueError( - "Cannot add child parameters: the path ({}), is not valid.".format( - self.path_string - ) - ) - return selectors - - @classmethod - def _recursive_add_child_parameters( - cls, - configs: List[parameter_config.ParameterConfig], - path: _PathSelector, - root: parameter_config.ParameterConfig, - parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig], - ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: - """Recursively adds new children to all matching parameters. - - new_children are potentially added to root, and all matching child - parameters with name==parent_name. - - Args: - configs: A list of configs to include in returned SearchSpaceSelectors, - this list is not modified or used for anything else. - path: The path to include in returned SearchSpaceSelectors. - root: Parent parameter to start the recursion at. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, list of selectors to - any parameters which may have been added) - """ - updated_children: List[ - Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] - ] = [] - selectors: List["SearchSpaceSelector"] = [] - if root.name == parent_name: - # Add new children to this root. If this is a leaf parameter, - # e.g. it has no children, this is where the recursion ends. - for child in new_children: - updated_children.append((parent_values, child)) - # For the path, select one parent value, since for the path, the exact - # value does not matter, as long as it's valid. - root_path_fragment = [ - _PathSegment(name=root.name, value=parent_values[0]) - ] - selectors.append( - SearchSpaceSelector( - configs=configs, - selected_path=path + root_path_fragment, - selected_name=child.name, - selected_values=[], - ) - ) - # Recursively update existing children, if any. - for child in root.child_parameter_configs: - # For the path, select one parent value, since for the path, the exact - # value does not matter, as long as it's valid. - root_path_fragment = [ - _PathSegment(name=root.name, value=child.matching_parent_values[0]) - ] - updated_child, new_selectors = cls._recursive_add_child_parameters( - configs, - path + root_path_fragment, - child, - parent_name, - parent_values, - new_children, - ) - updated_children.append( - (updated_child.matching_parent_values, updated_child) - ) - selectors += new_selectors - # Update all children (existing and potentially new) in the root. - return root.clone_without_children.add_children(updated_children), selectors - - @classmethod - def _add_parameters_at_selected_path( - cls, - root_configs: List[parameter_config.ParameterConfig], - complete_path: _PathSelector, - parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig], - ) -> Tuple[int, parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: - """Adds new children to the parameter specified by the path and parent_name. - - Args: - root_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - complete_path: The path to include in the returned SearchSpaceSelectors. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (Root index in root_configs, - an updated root with all of its children updated, - list of selectors to any parameters which may have been added) - - Raises: - RuntimeError: - ValueError: - """ - if not complete_path: - # This is an internal error, since the caller should never specify an - # empty current_path. - raise RuntimeError("Internal error: got empty complete_path") - - # This is the beginning of the recursion. Select a root to recurse at. - current_root: Optional[parameter_config.ParameterConfig] = None - root_idx: int = 0 - for root_idx, root_param in enumerate(root_configs): - if complete_path[0].name == root_param.name: - current_root = root_param - break - if current_root is None: - raise ValueError( - "Invalid path: {}: failed to traverse the path: failed" - ' to find a matching root for parameter name "{}".' - " Root parameter names: {}".format( - (complete_path), - complete_path[0].name, - [pc.name for pc in root_configs], - ) - ) - - updated_root, selectors = cls._add_parameters_at_selected_path_inner( - root_configs=root_configs, - complete_path=complete_path, - current_root=current_root, - current_path=complete_path[1:], - parent_name=parent_name, - parent_values=parent_values, - new_children=new_children, - ) - return (root_idx, updated_root, selectors) - - @classmethod - def _add_parameters_at_selected_path_inner( - cls, - root_configs: List[parameter_config.ParameterConfig], - complete_path: _PathSelector, - current_root: parameter_config.ParameterConfig, - current_path: _PathSelector, - parent_name: str, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig], - ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: - """Adds new children to the parameter specified by the path and parent_name. - - Args: - root_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - complete_path: The path to include in the returned SearchSpaceSelectors. - current_root: Parent parameter to start the recursion at. - current_path: The path to the parent parameter from current_root. This is - used in the recursion. - parent_name: new_children are added to all parameter with this name. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, - List of selectors to all added parameters) - - Raises: - RuntimeError: - ValueError: - """ - updated_children: List[ - Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] - ] = [] - selectors: List["SearchSpaceSelector"] = [] - - if not current_path: - # This is the end of the path. End the recursion. - # parent_name should be a child of current_root - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if parent_name == child.name: - child_idx = idx - last_parent_path = [ - _PathSegment(name=parent_name, value=parent_values[0]) - ] - new_path = complete_path + last_parent_path - updated_child, selectors = cls._add_child_parameters( - root_configs, new_path, child, parent_values, new_children - ) - break - if child_idx is None: - raise ValueError( - 'Invalid parent_name: after traversing the path "{}", ' - 'failed to find a child parameter with name "{}".' - ' Current root="{}"'.format( - (complete_path), parent_name, current_root - ) - ) - - # Update current_root with the updated child. - for idx, child in enumerate(current_root.child_parameter_configs): - if idx == child_idx: - updated_children.append( - (updated_child.matching_parent_values, updated_child) - ) - else: - updated_children.append((child.matching_parent_values, child)) - return ( - current_root.clone_without_children.add_children(updated_children), - selectors, - ) - - # Traverse the path: find which child matches the next path selection. - child_idx = None - for idx, child in enumerate(current_root.child_parameter_configs): - if ( - current_path[0].name == child.name - and current_path[0].value in child.feasible_values - ): - child_idx = idx - break - if child_idx is None: - raise ValueError( - 'Invalid path: "{}": failed to traverse the path: failed' - ' to find a matching child for path selector "{}".' - ' Current root="{}", current_path="{}"'.format( - (complete_path), - (current_path[:1]), - current_root.name, - (current_path), - ) - ) - - updated_child, selectors = cls._add_parameters_at_selected_path_inner( - root_configs=root_configs, - complete_path=complete_path, - current_root=current_root.child_parameter_configs[child_idx], - current_path=current_path[1:], - parent_name=parent_name, - parent_values=parent_values, - new_children=new_children, - ) - # Update current_root with the updated child, leave the selectors untouched. - for idx, child in enumerate(current_root.child_parameter_configs): - if idx == child_idx: - updated_children.append( - (updated_child.matching_parent_values, updated_child) - ) - else: - updated_children.append((child.matching_parent_values, child)) - return ( - current_root.clone_without_children.add_children(updated_children), - selectors, - ) - - @classmethod - def _add_child_parameters( - cls, - selector_configs: List[parameter_config.ParameterConfig], - selector_path: _PathSelector, - parent: parameter_config.ParameterConfig, - parent_values: MonotypeParameterSequence, - new_children: List[parameter_config.ParameterConfig], - ) -> Tuple[parameter_config.ParameterConfig, List["SearchSpaceSelector"]]: - """Adds new children to the parent parameter and returns selectors. - - Args: - selector_configs: A list of configs to include in returned - SearchSpaceSelectors, this list is not modified. These are expected to - be the configs at the root of the search space. - selector_path: The path to include in the returned SearchSpaceSelectors. - parent: Parent parameter to add children to. - parent_values: new_children are added with these parent values. - new_children: Child parameter configs to add. - - Returns: - (An updated root with all of its children updated, - List of selectors to all added parameters) - - Raises: - RuntimeError: - ValueError: - """ - updated_children: List[ - Tuple[MonotypeParameterSequence, parameter_config.ParameterConfig] - ] = [] - selectors: List["SearchSpaceSelector"] = [] - - # Add existing children. - for child in parent.child_parameter_configs: - updated_children.append((child.matching_parent_values, child)) - # Add new child parameter configs. - for child in new_children: - updated_children.append((parent_values, child)) - selectors.append( - SearchSpaceSelector( - configs=selector_configs, - selected_path=selector_path, - selected_name=child.name, - selected_values=[], - ) - ) - # Add all children (existing and potentially new) to the parent. - return (parent.clone_without_children.add_children(updated_children), selectors) - - -@attr.s(frozen=True, init=True, slots=True, kw_only=True) -class SearchSpace: - """A builder and wrapper for StudyConfig.parameter_configs.""" - - _parameter_configs: List[parameter_config.ParameterConfig] = attr.ib( - init=False, factory=list - ) - - @classmethod - def _factory( - cls: Type[_T], - parameter_configs: Optional[List[parameter_config.ParameterConfig]] = None, - ) -> _T: - """Creates a new SearchSpace containing the provided parameter configs. - - Args: - parameter_configs: - - Returns: - SearchSpace - """ - if parameter_configs is None: - parameter_configs = [] - space = cls() - object.__setattr__(space, "_parameter_configs", list(parameter_configs)) - return space - - @property - def parameters(self) -> List[parameter_config.ParameterConfig]: - """Returns COPIES of the parameter configs in this Space.""" - return copy.deepcopy(self._parameter_configs) - - def select_root(self) -> SearchSpaceSelector: - """Returns a selector for the root of the search space. - - Parameters can be added to the search space using the returned - SearchSpaceSelector. - """ - return SearchSpaceSelector(configs=self._parameter_configs) - - @property - def is_conditional(self) -> bool: - """Returns True if search_space contains any conditional parameters.""" - return any([p.child_parameter_configs for p in self._parameter_configs]) - - def contains(self, parameters: trial.ParameterDict) -> bool: - try: - self.assert_contains(parameters) - return True - except InvalidParameterError: - return False - - def assert_contains(self, parameters: trial.ParameterDict) -> bool: - """Throws an error if parameters is not a valid point in the space. - - Args: - parameters: - - Returns: - Always returns True unless an exception is Raised. - - Raises: - InvalidParameterError: If parameters are invalid. - NotImplementedError: If parameter type is unknown - """ - if self.is_conditional: - raise NotImplementedError("Not implemented for conditional space.") - if len(parameters) != len(self._parameter_configs): - set1 = set(pc.name for pc in self._parameter_configs) - set2 = set(parameters) - raise InvalidParameterError( - f"Search space has {len(self._parameter_configs)} parameters " - f"but only {len(parameters)} were given. " - f"Missing in search space: {set2 - set1}. " - f"Missing in parameters: {set1 - set2}." - ) - for pc in self._parameter_configs: - if pc.name not in parameters: - raise InvalidParameterError(f"{pc.name} is missing in {parameters}.") - elif not pc.contains(parameters[pc.name]): - raise InvalidParameterError( - f"{parameters[pc.name]} is not feasible in {pc}" - ) - return True - - -################### Main Class ################### -@attr.define(frozen=False, init=True, slots=True) -class ProblemStatement: - """A builder and wrapper for core StudyConfig functionality.""" - - search_space: SearchSpace = attr.ib( - init=True, - factory=SearchSpace, - validator=attr.validators.instance_of(SearchSpace), - ) - - metric_information: MetricsConfig = attr.ib( - init=True, - factory=MetricsConfig, - converter=MetricsConfig, - validator=attr.validators.instance_of(MetricsConfig), - kw_only=True, - ) - - metadata: common.Metadata = attr.field( - init=True, - kw_only=True, - factory=common.Metadata, - validator=attr.validators.instance_of(common.Metadata), - ) - - @property - def debug_info(self) -> str: - return "" diff --git a/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py deleted file mode 100644 index 18f491d437..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/base_study_config_test.py +++ /dev/null @@ -1,511 +0,0 @@ -"""Tests for vizier.pyvizier.shared.base_study_config.""" - -import numpy as np -from vizier._src.pyvizier.shared import base_study_config -from vizier._src.pyvizier.shared import parameter_config as pc -from vizier._src.pyvizier.shared import trial -from absl.testing import absltest -from absl.testing import parameterized - - -class ObjectiveMetricGoalTest(absltest.TestCase): - def test_basics(self): - self.assertTrue(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_maximize) - self.assertFalse(base_study_config.ObjectiveMetricGoal.MAXIMIZE.is_minimize) - self.assertTrue(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_minimize) - self.assertFalse(base_study_config.ObjectiveMetricGoal.MINIMIZE.is_maximize) - - -class MetricTypeTest(absltest.TestCase): - def test_basics(self): - self.assertTrue(base_study_config.MetricType.SAFETY.is_safety) - self.assertTrue(base_study_config.MetricType.OBJECTIVE.is_objective) - - -class MetricInformationTest(absltest.TestCase): - def testMinMaxValueDefault(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE - ) - self.assertEqual(info.min_value, -np.inf) - self.assertEqual(info.max_value, np.inf) - - def testMinMaxValueSet(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=-1.0, - max_value=1.0, - ) - self.assertEqual(info.min_value, -1.0) - self.assertEqual(info.max_value, 1.0) - - def testMinMaxBadValueInit(self): - with self.assertRaises(ValueError): - base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=1.0, - max_value=-1.0, - ) - - def testMinMaxBadValueSet(self): - info = base_study_config.MetricInformation( - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - min_value=-1.0, - max_value=1.0, - ) - with self.assertRaises(ValueError): - info.min_value = 2.0 - with self.assertRaises(ValueError): - info.max_value = -2.0 - - -class MetricsConfigTest(parameterized.TestCase): - def testBasics(self): - config = base_study_config.MetricsConfig() - config.append( - base_study_config.MetricInformation( - name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE - ) - ) - config.extend( - [ - base_study_config.MetricInformation( - name="max_safe1", - goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE, - safety_threshold=0.0, - ), - base_study_config.MetricInformation( - name="max2", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE - ), - base_study_config.MetricInformation( - name="min1", goal=base_study_config.ObjectiveMetricGoal.MINIMIZE - ), - base_study_config.MetricInformation( - name="min_safe2", - goal=base_study_config.ObjectiveMetricGoal.MINIMIZE, - safety_threshold=0.0, - ), - ] - ) - self.assertLen(config, 5) - self.assertLen(config.of_type(base_study_config.MetricType.OBJECTIVE), 3) - self.assertLen(config.of_type(base_study_config.MetricType.SAFETY), 2) - - def testDuplicateNames(self): - config = base_study_config.MetricsConfig() - config.append( - base_study_config.MetricInformation( - name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE - ) - ) - with self.assertRaises(ValueError): - config.append( - base_study_config.MetricInformation( - name="max1", goal=base_study_config.ObjectiveMetricGoal.MAXIMIZE - ) - ) - - -class SearchSpaceTest(parameterized.TestCase): - def testAddFloatParamMinimal(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - selector = space.select_root().add_float_param("f1", 1.0, 15.0) - # Test the returned selector. - self.assertEqual(selector.path_string, "") - self.assertEqual(selector.parameter_name, "f1") - self.assertEqual(selector.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, "f1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[0].feasible_values - self.assertIsNone(space.parameters[0].default_value) - - _ = space.select_root().add_float_param("f2", 2.0, 16.0) - self.assertLen(space.parameters, 2) - self.assertEqual(space.parameters[0].name, "f1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[1].name, "f2") - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - - def testAddFloatParam(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_float_param( - "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG - ) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, "f1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - def testAddDiscreteParamIntegerFeasibleValues(self): - """Test a Discrete parameter with integer feasible values.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_discrete_param( - "d1", [101, 15.0, 21.0], default_value=15.0 - ) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, "d1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DISCRETE) - self.assertEqual(space.parameters[0].bounds, (15.0, 101.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, [15.0, 21.0, 101]) - self.assertEqual(space.parameters[0].default_value, 15.0) - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.INTEGER) - - def testAddDiscreteParamFloatFeasibleValues(self): - """Test a Discrete parameter with float feasible values.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_discrete_param( - "d1", [15.1, 21.0, 101], default_value=15.1 - ) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.FLOAT) - - def testAddBooleanParam(self): - """Test a Boolean parameter.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_bool_param("b1", default_value=True) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, "b1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) - with self.assertRaisesRegex(ValueError, "Accessing bounds of a categorical.*"): - _ = space.parameters[0].bounds - self.assertIsNone(space.parameters[0].scale_type) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, ["False", "True"]) - self.assertEqual(space.parameters[0].default_value, "True") - self.assertEqual(space.parameters[0].external_type, pc.ExternalType.BOOLEAN) - - def testAddBooleanParamWithFalseDefault(self): - """Test a Boolean parameter.""" - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_bool_param("b1", default_value=False) - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].default_value, "False") - - def testAddTwoFloatParams(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - _ = space.select_root().add_float_param( - "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG - ) - _ = space.select_root().add_float_param( - "f2", 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG - ) - - self.assertLen(space.parameters, 2) - - self.assertEqual(space.parameters[0].name, "f1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, "f2") - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testChainAddTwoFloatParams(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - root.add_float_param( - "f1", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG - ) - root.add_float_param( - "f2", 2.0, 16.0, default_value=4.0, scale_type=pc.ScaleType.REVERSE_LOG - ) - - self.assertLen(space.parameters, 2) - - self.assertEqual(space.parameters[0].name, "f1") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, "f2") - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 16.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.REVERSE_LOG) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testMultidimensionalParameters(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - selector0 = space.select_root().add_float_param( - "f", 1.0, 15.0, default_value=3.0, scale_type=pc.ScaleType.LOG, index=0 - ) - selector1 = space.select_root().add_float_param( - "f", 2.0, 10.0, default_value=4.0, scale_type=pc.ScaleType.LINEAR, index=1 - ) - # Test the returned selectors. - self.assertEqual(selector0.path_string, "") - self.assertEqual(selector0.parameter_name, "f[0]") - self.assertEqual(selector0.parameter_values, []) - self.assertEqual(selector1.path_string, "") - self.assertEqual(selector1.parameter_name, "f[1]") - self.assertEqual(selector1.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 2) - self.assertEqual(space.parameters[0].name, "f[0]") - self.assertEqual(space.parameters[0].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[0].bounds, (1.0, 15.0)) - self.assertEqual(space.parameters[0].scale_type, pc.ScaleType.LOG) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[0].feasible_values - self.assertEqual(space.parameters[0].default_value, 3.0) - - self.assertEqual(space.parameters[1].name, "f[1]") - self.assertEqual(space.parameters[1].type, pc.ParameterType.DOUBLE) - self.assertEqual(space.parameters[1].bounds, (2.0, 10.0)) - self.assertEqual(space.parameters[1].scale_type, pc.ScaleType.LINEAR) - self.assertEmpty(space.parameters[1].matching_parent_values) - self.assertEmpty(space.parameters[1].child_parameter_configs) - with self.assertRaisesRegex(ValueError, "feasible_values is invalid.*"): - _ = space.parameters[1].feasible_values - self.assertEqual(space.parameters[1].default_value, 4.0) - - def testConditionalParameters(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - root.add_categorical_param("model_type", ["linear", "dnn"], default_value="dnn") - # Test the selector. - self.assertEqual(root.path_string, "") - self.assertEqual(root.parameter_name, "") - self.assertEqual(root.parameter_values, []) - # Test the search space. - self.assertLen(space.parameters, 1) - self.assertEqual(space.parameters[0].name, "model_type") - self.assertEqual(space.parameters[0].type, pc.ParameterType.CATEGORICAL) - with self.assertRaisesRegex(ValueError, "Accessing bounds of a categorical.*"): - _ = space.parameters[0].bounds - self.assertIsNone(space.parameters[0].scale_type) - self.assertEmpty(space.parameters[0].matching_parent_values) - self.assertEmpty(space.parameters[0].child_parameter_configs) - self.assertEqual(space.parameters[0].feasible_values, ["dnn", "linear"]) - self.assertEqual(space.parameters[0].default_value, "dnn") - - dnn = root.select("model_type", ["dnn"]) - # Test the selector. - self.assertEqual(dnn.path_string, "") - self.assertEqual(dnn.parameter_name, "model_type") - self.assertEqual(dnn.parameter_values, ["dnn"]) - dnn.add_float_param( - "learning_rate", - 0.0001, - 1.0, - default_value=0.001, - scale_type=base_study_config.ScaleType.LOG, - ) - # Test the search space. - self.assertLen(space.parameters, 1) - - linear = root.select("model_type", ["linear"]) - # Test the selector. - self.assertEqual(linear.path_string, "") - self.assertEqual(linear.parameter_name, "model_type") - self.assertEqual(linear.parameter_values, ["linear"]) - linear.add_float_param( - "learning_rate", - 0.1, - 1.0, - default_value=0.1, - scale_type=base_study_config.ScaleType.LOG, - ) - # Test the search space. - self.assertLen(space.parameters, 1) - - dnn_optimizer = dnn.add_categorical_param("optimizer_type", ["adam", "adagrad"]) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(dnn_optimizer.path_string, "model_type=dnn") - self.assertEqual(dnn_optimizer.parameter_name, "optimizer_type") - self.assertEqual(dnn_optimizer.parameter_values, []) - - # Chained select() calls, path length of 1. - lr = ( - root.select("model_type", ["dnn"]) - .select("optimizer_type", ["adam"]) - .add_float_param( - "learning_rate", - 0.1, - 1.0, - default_value=0.1, - scale_type=base_study_config.ScaleType.LOG, - ) - ) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(lr.parameter_name, "learning_rate") - self.assertEqual(lr.parameter_values, []) - self.assertEqual(lr.path_string, "model_type=dnn/optimizer_type=adam") - - # Chained select() calls, path length of 2. - ko = ( - root.select("model_type", ["dnn"]) - .select("optimizer_type", ["adam"]) - .add_bool_param("use_keras_optimizer", default_value=False) - ) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(ko.parameter_name, "use_keras_optimizer") - self.assertEqual(ko.parameter_values, []) - self.assertEqual(ko.path_string, "model_type=dnn/optimizer_type=adam") - - ko.select_values(["True"]) - self.assertEqual(ko.parameter_values, ["True"]) - - selector = ko.add_float_param("keras specific", 1.3, 2.4, default_value=2.1) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selector. - self.assertEqual(selector.parameter_name, "keras specific") - self.assertEqual(selector.parameter_values, []) - self.assertEqual( - selector.path_string, - "model_type=dnn/optimizer_type=adam/use_keras_optimizer=True", - ) - - # Selects more than one node. - # selectors = dnn.select_all('optimizer_type', ['adam']) - # self.assertLen(selectors, 2) - - def testConditionalParametersWithReturnedSelectors(self): - space = base_study_config.SearchSpace() - self.assertEmpty(space.parameters) - root = space.select_root() - model_type = root.add_categorical_param("model_type", ["linear", "dnn"]) - learning_rate = model_type.select_values(["dnn"]).add_float_param( - "learning_rate", - 0.1, - 1.0, - default_value=0.001, - scale_type=base_study_config.ScaleType.LOG, - ) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selectors. - self.assertEqual(model_type.parameter_values, ["dnn"]) - self.assertEqual(learning_rate.parameter_name, "learning_rate") - self.assertEqual(learning_rate.parameter_values, []) - self.assertEqual(learning_rate.path_string, "model_type=dnn") - - # It is possible to select different values for the same selector. - optimizer_type = model_type.select_values( - ["linear", "dnn"] - ).add_categorical_param("optimizer_type", ["adam", "adagrad"]) - # Test the search space. - self.assertLen(space.parameters, 1) - # Test the selectors. - self.assertEqual(model_type.parameter_values, ["linear", "dnn"]) - self.assertEqual(optimizer_type.parameter_name, "optimizer_type") - self.assertEqual(optimizer_type.parameter_values, []) - self.assertEqual(optimizer_type.path_string, "model_type=linear") - - @parameterized.named_parameters( - ("Multi", "units[0]", ("units", 0)), - ("Multi2", "with_underscore[1]", ("with_underscore", 1)), - ("NotMulti", "units", None), - ("NotMulti2", "with space", None), - ("NotMulti3", "with[8]space", None), - ("NotMulti4", "units[0][4]", ("units[0]", 4)), - ( - "GinStyle", - "_gin.ambient_net_exp_from_vec.block_type[3]", - ("_gin.ambient_net_exp_from_vec.block_type", 3), - ), - ) - def testParseMultiDimensionalParameterName(self, name, expected): - base_name_index = base_study_config.SearchSpaceSelector.parse_multi_dimensional_parameter_name( - name - ) - self.assertEqual(base_name_index, expected) - - -class SearchSpaceContainsTest(absltest.TestCase): - def _space(self): - space = base_study_config.SearchSpace() - root = space.select_root() - root.add_float_param("learning-rate", 1e-4, 1e-2) - root.add_categorical_param("optimizer", ["adagrad", "adam", "experimental"]) - return space - - def testFloatCat1(self): - self._space().assert_contains( - trial.ParameterDict({"optimizer": "adagrad", "learning-rate": 1e-2}) - ) - - def testFloatCat2(self): - self.assertFalse( - self._space().contains( - trial.ParameterDict({"optimizer": "adagrad", "BADPARAM": 1e-2}) - ) - ) - - def testFloatCat3(self): - self.assertFalse( - self._space().contains( - trial.ParameterDict( - {"optimizer": "adagrad", "learning-rate": 1e-2, "BADPARAM": 1e-2} - ) - ) - ) - - def testFloatCat4(self): - self.assertFalse( - self._space().contains( - trial.ParameterDict({"optimizer": "adagrad", "learning-rate": 1e2}) - ) - ) - - -if __name__ == "__main__": - absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/common.py b/google/cloud/aiplatform/vizier/pyvizier/common.py deleted file mode 100644 index 5d19fc364b..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/common.py +++ /dev/null @@ -1,516 +0,0 @@ -"""Common classes shared between Study and Trial.""" - -import collections -from collections import abc -from typing import DefaultDict, Dict, overload -from typing import Iterable, List, Optional, Tuple, TypeVar, Union, Type -import attr - -from google.protobuf import any_pb2 -from google.protobuf.message import Message - -M = TypeVar("M", bound=Message) -T = TypeVar("T") -MetadataValue = Union[str, any_pb2.Any, Message] - -# Namespace Encoding. -# -# By definition, ∀ ns ∈ Namespace, Namespace.decode(ns.encode()) == ns. -# The tricky part of that definition is handling namespaces with components -# that are empty strings. Notably, we want to make sure that -# Namespace(()).encode() != Namespace(('',)).encode(). -# So, we set up the mapping: -# Namespace(()).encode() -> '' -# Namespace((s,)).encode() -> ':s' -# Namespace((s, s)).encode() -> ':s:s', -# et cetera, and note that every tuple gets a unique encoding, even if $s is the -# empty string. (As long as we escape colons properly.) -# -# So, ns.encode() is a bijection, therefore it has an inverse which we call -# Namespace.decode(s). - - -def _parse(arg: str) -> Tuple[str, ...]: - """Parses an encoded namespace string into a namespace tuple.""" - # The tricky part here is that arg.split('') has a length of 1, so it can't - # generate a zero-length tuple; we handle that corner case manually. - if not arg: - return () - # And, then, once we've handled the case of _parse(''), we note that all the - # other encoded strings begin with a colon. It thus contains no information - # and we can remove it. - # TODO: Once we're on Python 3.9, use: arg = arg.removeprefix(':') - if arg.startswith(":"): - arg = arg[1:] - # The rest of the algorithm is that we split on all colons, both - # escaped and unescaped. Then, we walk through the list of fragments and - # join back together the colons that were preceeded by an escape character, - # dropping the escape character as we go. - fragments = arg.split(":") - output = [] - join = False - for frag in fragments: - if join and frag and frag[-1] == "\\": - output[-1] += ":" + frag[:-1] - join = True - elif join: # Doesn't end in an escape character. - output[-1] += ":" + frag - join = False - elif frag and frag[-1] == "\\": # Don't join to previous. - output.append(frag[:-1]) - join = True - else: # Don't join to previous and doesn't end in an escape. - output.append(frag) - join = False - return tuple(output) - - -@attr.frozen(eq=True, order=True, hash=True, auto_attribs=True, init=False) -class Namespace(abc.Sequence): - r"""A namespace for the Metadata class. - - Namespaces form a tree; a particular namespace can be thought of as a tuple of - namespace components. - - You can create a Namespace from a string, with Namespace.decode(s), where - the string is parsed into components, splitting at colons; decode('a:b') gives - you a two-component namespace: ('a', 'b'). - Or, you can create that same Namespace from a tuple of strings/components - e.g. by constructing Namespace(('a', 'b')). In the tuple case, the strings - are not parsed and colons are ordinary characters. - - TLDR: If you decode() a namespace from a string, then ":" is a - reserved character, but when constructing from a tuple, there are no - reserved characters. - - Decoding the string form: - * Initial colons don't matter: Namespace.decode(':a') == Namespace('a'); - this is a single-component namespace. - * Colons separate components: - Namespace.decode('a:b') == Namespace(['a', 'b']). - (This is a two-component namespace.) - * Colons are encoded as r'\:': - Namespace.decode('a\\:b') == Namespace(('a:b')). - (This is a single-component namespace.) - - Conversions: For a Namespace x, - * Namespace.decode(x.encode()) == x; here, x.encode() will be a string with - colons separating the components. - * Namespaces act as a Sequence[str], so Namespace(tuple(x)) == x and - Namespace(x) == x. - """ - - _as_tuple: Tuple[str, ...] = attr.field(hash=True, eq=True, order=True) - - def __init__(self, arg: Iterable[str] = ()): - """Generates a Namespace from its component strings. - - Args: - arg: a tuple representation of a namespace. - """ - arg = tuple(arg) - self.__attrs_init__(as_tuple=arg) - - _ns_repr_table = str.maketrans({":": r"\:"}) - - @classmethod - def decode(cls, s: str) -> "Namespace": - r"""Decode a string into a Namespace. - - For a Namespace x, Namespace.decode(x.encode()) == x. - - Args: - s: A string where ':' separates namespace components, and colon is - escaped as r'\:'. - - Returns: - A namespace. - """ - return Namespace(_parse(s)) - - def encode(self) -> str: - """Encodes a Namespace into a string. - - Given a Namespace x, Namespace.decode(x.encode()) == x. - - Returns: - Colons are escaped, then Namespace components are joined by colons. - """ - return "".join([":" + c.translate(self._ns_repr_table) for c in self._as_tuple]) - - def __len__(self) -> int: - """Number of components (elements of the tuple form).""" - return len(self._as_tuple) - - def __add__(self, other: Iterable[str]) -> "Namespace": - """Appends components onto the namespace.""" - return Namespace(self._as_tuple + tuple(other)) - - @overload - def __getitem__(self, key: int) -> str: - ... - - @overload - def __getitem__(self, key: slice) -> "Namespace": - ... - - def __getitem__(self, key): - """Retrieves item by the specified key.""" - if isinstance(key, int): - return self._as_tuple[key] - return Namespace(self._as_tuple[key]) - - def __str__(self) -> str: - """Shows the namespace, fully escaped.""" - return self.encode() - - def __repr__(self) -> str: - """Shows the namespace, fully escaped.""" - return f"Namespace({self.encode()})" - - def startswith(self, prefix: Iterable[str]) -> bool: - """Returns True if this namespace starts with prefix.""" - ns_prefix = Namespace(prefix) - return self[: len(ns_prefix)] == ns_prefix - - -class _MetadataSingleNameSpace(Dict[str, MetadataValue]): - """Stores metadata associated with one namespace.""" - - pass - - -class Metadata(abc.MutableMapping): - """Metadata class. - - This is the main interface for reading metadata from a Trial (writing metadata - should typically be done via the MetadataUpdateContext class.) - - This behaves like a str->str dict, within a given namespace. - mm = Metadata({'foo': 'Foo'}) - mm.get('foo') # Returns 'Foo' - mm['foo'] # Returns 'Foo' - mm['bar'] = 'Bar' - mm.update({'a': 'A'}, gleep='Gleep') - - 1. Keys are namespaced. Each Metadata object only interacts with one - Namespace, but a metadata object and its children share a - common set of (namespace, key, value) triplets. - - Namespaces form a tree, and you can walk down the tree. There are two - namespace operators: ns(s) which adds component(s) on to the namespace, and - abs_ns() which specifies the entire namespace. - - A Metadata() object is always created at the root of the namespace tree, - and the root is special (it's the only namespace that Vizier users can write - or conveniently read). Pythia algorithm developers should avoid the root - namespace, unless they intend to pass data to/from Vizier users. - - mm = Metadata({'foo': 'foofoo'}) - mm.ns('NewName')['bar'] = 'Bar' - mm['foo'] # Returns 'foofoo' - mm['bar'] # Throws a KeyError - mm.ns('NewName')['foo'] # Throws a KeyError - mm.ns('NewName')['bar'] # Returns 'Bar' - mm.ns('NewName').get('bar') # Returns 'Bar' - - # Use of abs_ns(). - mm = Metadata() - mm.abs_ns(Namespace(('NewName',)))['bar'] = 'Bar' - mm.abs_ns(Namespace(('NewName',))) # returns 'Bar' - - # Multi-component namespaces. - mm = Metadata() - mm.ns('a').ns('b')['foo'] = 'AB-foo' - mm.ns('a')['foo'] = 'A-foo' - mm['foo'] # Throws a KeyError - mm.ns('a')['foo'] # returns 'A-foo' - mm.ns('a').ns('b')['foo'] # returns 'AB-foo' - mm.abs_ns(Namespace(('a', 'b'))).get('foo') # Returns 'ab-foo' - mm.abs_ns(Namespace.decode('a:b')).get('foo') # Returns 'ab-foo' - - 2. Values can be protobufs. If `metadata['foo']` is an instance of `MyProto` - proto message or `Any` proto that packs a `MyProto` message, then the proto - can be recovered by calling: - my_proto = metadata.get_proto('foo', cls=MyProto) - isinstance(my_proto, MyProto) # Returns `True` - - 3. An iteration over a Metadata object only shows you the data in the current - namespace. So, - - mm = Metadata({'foo': 'foofoo'}) - for k, v in mm.ns('gleep'): - ... - - will not yield anything because there are no keys in the 'gleep' namespace. - WARNING: Because of this behavior, Metadata(mm) will quietly drop metadata - from all but mm's current namespace. - - Be aware that type(v) is MetadataValue, which includes protos in addition to - strings. - - To iterate over all the keys in all the namespaces use the namespaces() - method. - - mm : Metadata - for ns in mm.namespaces(): - for k, v in mm.abs_ns(ns).items(): - ... - """ - - def __init__( - self, - *args: Union[Dict[str, MetadataValue], Iterable[Tuple[str, MetadataValue]]], - **kwargs: MetadataValue, - ): - """Construct; this follows dict(), and puts data in the root namespace. - - You can pass it a dict, or an object that yields (key, value) - pairs, and those pairs will be put in the root namespace. - - Args: - *args: A dict or an iterable the yields key-value pairs. - **kwargs: key=value pairs to be added to the specified namespace. - """ - self._stores: DefaultDict[ - Namespace, _MetadataSingleNameSpace - ] = collections.defaultdict(_MetadataSingleNameSpace) - self._namespace = Namespace() - self._store = self._stores[self._namespace] - self._store.update(*args, **kwargs) - - def abs_ns(self, namespace: Iterable[str] = ()) -> "Metadata": - """Switches to a specified absolute namespace. - - All the Metadata object's data is shared between $self and the returned - object, but the new Metadata object will have a different default - namespace. - - Args: - namespace: a list of Namespace components. (Defaults to the root, empty - Namespace.) - - Returns: - A new Metadata object in the specified namespace; the new object shares - data (except the namespace) with $self. - """ - return self._copy_core(Namespace(namespace)) - - def ns(self, component: str) -> "Metadata": - r"""Switches to a deeper namespace by appending a component. - - All the metadata is shared between $self and the returned value, but they - have a different current namespace. - - Args: - component: one component to be added to the current namespace. - - Returns: - A new Metadata object in the specified namespace; the new object shares - metadata (except the choice of namespace) with $self. - """ - new_ns: Namespace = self._namespace + (component,) - return self._copy_core(new_ns) - - def __repr__(self) -> str: - itemlist: List[str] = [] - for namespace, store in self._stores.items(): - item_string = f"(namespace:{namespace}, items: {store})" - itemlist.append(item_string) - return "Metadata({}, current_namespace={})".format( - ", ".join(itemlist), self._namespace.encode() - ) - - def __str__(self) -> str: - return "namespace: {} items: {}".format(str(self._namespace), self._store) - - def get_proto(self, key: str, *, cls: Type[M]) -> Optional[M]: - """Deprecated. - - Use get() instead. - - Gets the metadata as type `cls`, or None if not possible. - - Args: - key: - cls: Pass in a proto ***class***, not a proto object. - - Returns: - Proto message, if the value associated with the key exists and - can be parsed into cls; None otherwise. - """ - value = self._store.get(key, None) - if value is None: - return None - - if isinstance(value, cls): - # Starting from 3.10, pytype supports typeguard, which obsoletes - # the need for the `pytype:disable` clause. - return value # pytype: disable=bad-return-type - - if isinstance(value, any_pb2.Any): - # `value` is an Any proto potentially packing `cls`. - message = cls() - success = value.Unpack(message) - return message if success else None - - return None - - def get( - self, key: str, default: Optional[T] = None, *, cls: Type[T] = str - ) -> Optional[T]: - """Gets the metadata as type `cls`, or None if not possible. - - Given regular string values, this function behaves exactly like a - regular string-to-string dict (within its namespace). - metadata = common.Metadata({'key': 'value'}) - assert metadata.get('key') == 'value' - assert metadata.get('badkey', 'badvalue') == 'badvalue' - - Example with numeric string values: - metadata = common.Metadata({'float': '1.2', 'int': '60'}) - assert metadata.get('float', cls=float) == 1.2 - assert metadata.get('badkey', 0.2, cls=float) == 0.2 - assert metadata.get('int', cls=int) == 60 - assert metadata.get('badkey', 1, cls=int) == 1 - - Example with `Duration` and `Any` proto values: - duration = Duration(seconds=60) - anyproto = Any() - anyproto.Pack(duration) - metadata = common.Metadata({'duration': duration, 'any': anyproto}) - assert metadata.get('duration', cls=Duration) == duration - assert metadata.get('any', cls=Duration) == duration - - Args: - key: - default: Default value. - cls: Desired type of the value. - - Returns: - Default if the key does not exist. Otherwise, the matching value is - parsed into type `cls`. For proto messages, it involves unpacking - Any proto. - """ - try: - value = self._store[key] - except KeyError: - return default - if isinstance(value, cls): - # Starting from 3.10, pytype supports typeguard, which obsoletes - # the need for the `pytype:disable` clause. - return value # pytype: disable=bad-return-type - if isinstance(value, any_pb2.Any): - # `value` is an Any proto potentially packing `cls`. - message = cls() - success = value.Unpack(message) - return message if success else None - return cls(value) - - # TODO: Rename to `abs_namespaces` - def namespaces(self) -> Tuple[Namespace, ...]: - """Get all namespaces for which there is at least one key. - - Returns: - For all `ns` in `md.namespaces()`, `md.abs_ns(ns)` is not empty. - """ - return tuple([ns for ns, store in self._stores.items() if store]) - - # TODO: Rename to `namespaces` - def subnamespaces(self) -> Tuple[Namespace, ...]: - """Returns relative namespaces that are at or below the current namespace. - - For all `ns` in `md.subnamespaces()`, `md.abs_ns(md.current_ns() + ns)` is - not empty. E.g. if namespace 'foo:bar' is non-empty, and you're in - namespace 'foo', then the result will contain namespace 'bar'. - - Returns: - For namespaces that begin with the current namespace and are - non-empty, this returns a namespace object that contains the relative - path from the current namespace. - """ - return tuple( - [ - Namespace(ns[len(self._namespace) :]) - for ns, store in self._stores.items() - if store and ns.startswith(self._namespace) - ] - ) - - def current_ns(self) -> Namespace: - """Displays the object's current Namespace.""" - return self._namespace - - # START OF abstract methods inherited from `MutableMapping` base class. - def __getitem__(self, key: str) -> MetadataValue: - return self._store.__getitem__(key) - - def __setitem__(self, key: str, value: MetadataValue): - self._store[key] = value - - def __delitem__(self, key: str): - del self._store[key] - - def __iter__(self): - return iter(self._store) - - def __len__(self): - return len(self._store) - - def __copy__(self) -> "Metadata": - """Shallow copy -- metadata continues to be shared. - - Returns: - A copy of the object. - """ - return self._copy_core(self._namespace) - - # END OF Abstract methods inherited from `MutableMapping` base class. - - def _copy_core(self, ns: Namespace) -> "Metadata": - """Shallow copy: metadata is shared, default namespace changes. - - Args: - ns: the namespace to use for the new object. - - Returns: - A copy of the object. - """ - md = Metadata() - md._namespace = ns # pylint: disable='protected-access' - md._stores = self._stores # pylint: disable='protected-access' - md._store = md._stores[md._namespace] # pylint: disable='protected-access' - return md - - def update( - self, - *args: Union[Dict[str, MetadataValue], Iterable[Tuple[str, MetadataValue]]], - **kwargs: MetadataValue, - ) -> None: - self._store.update(*args, **kwargs) - - def attach(self, other: "Metadata") -> None: - """Attach the $other metadata as a descendent of this metadata. - - More precisely, it takes the part of `other`'s namespace that is at or - below `other`'s current namespace, and attaches it to `self`'s current - namespace. - * Tree structure is preserved and nothing is flattened. - * Attached data overwrites existing data, item-by-item, not - namepace-by-namespace. - - So, if we have - other = Metadata() - other.abs_ns(Namespace.(('x', 'y', 'z'))['foo'] = 'bar' - m = Metadata() - m.ns('w').attach(other.ns('x')) - then - m.abs_ns(('w', 'y', 'z'))['foo'] will contain 'bar'. - - Args: - other: a Metadata object to copy from. - """ - for ns in other.subnamespaces(): - self._stores[self._namespace + ns].update( - other.abs_ns(other.current_ns() + ns) - ) diff --git a/google/cloud/aiplatform/vizier/pyvizier/common_test.py b/google/cloud/aiplatform/vizier/pyvizier/common_test.py deleted file mode 100644 index 718696c4ed..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/common_test.py +++ /dev/null @@ -1,341 +0,0 @@ -"""Tests for vizier.pyvizier.shared.common.""" - -import copy -from vizier._src.pyvizier.shared import common -from google.protobuf import any_pb2 -from google.protobuf import duration_pb2 -from absl.testing import absltest - - -class MetadataGetClsTest(absltest.TestCase): - def test_get_proto(self): - duration = duration_pb2.Duration(seconds=60) - anyproto = any_pb2.Any() - anyproto.Pack(duration) - metadata = common.Metadata(duration=duration, any=anyproto) - - self.assertEqual( - metadata.get_proto("duration", cls=duration_pb2.Duration), duration - ) - self.assertEqual(metadata.get_proto("any", cls=duration_pb2.Duration), duration) - self.assertEqual(metadata.get("duration", cls=duration_pb2.Duration), duration) - self.assertEqual(metadata.get("any", cls=duration_pb2.Duration), duration) - - def test_get_int(self): - metadata = common.Metadata({"string": "30", "int": "60"}) - self.assertEqual(metadata.get("string", cls=int), 30) - self.assertEqual(metadata.get("int", cls=int), 60) - self.assertEqual(metadata.get("badkey", 1, cls=int), 1) - - -class MetadataNamespaceTest(absltest.TestCase): - def test_basic(self): - ns0 = common.Namespace() - self.assertEmpty(ns0) - self.assertEqual(str(ns0), "") - self.assertEqual(ns0.encode(), "") - self.assertEqual(ns0, common.Namespace.decode("")) - n1t = common.Namespace(("aerer",)) - self.assertLen(n1t, 1) - n1 = common.Namespace.decode("a78") - self.assertLen(n1, 1) - self.assertEqual(str(n1), ":a78") - n2 = common.Namespace(("a78", "bfe")) - self.assertLen(n2, 2) - n2s1 = common.Namespace.decode("a78:bfe") - self.assertLen(n2s1, 2) - self.assertEqual(n2.encode(), n2s1.encode()) - n2s2 = common.Namespace.decode(":a78:bfe") - self.assertLen(n2s2, 2) - self.assertEqual(n2.encode(), n2s2.encode()) - self.assertEqual(n2, n2s2) - self.assertEqual(n2s1, n2s2) - ns = common.Namespace(("a", "b")) - self.assertLen(ns, 2) - self.assertEqual(tuple(ns), ("a", "b")) - self.assertEqual(str(ns), ":a:b") - self.assertEqual(ns.encode(), ":a:b") - - def test_escape(self): - s1 = "a\\:A" - ns1 = common.Namespace.decode(s1) - self.assertLen(ns1, 1) - self.assertEqual(str(ns1), ":a\\:A") - self.assertEqual(ns1.encode(), ":" + s1) - self.assertEqual(common.Namespace.decode(ns1.encode()), ns1) - # - s2 = "b:B" - ns2 = common.Namespace.decode(s2) - self.assertLen(ns2, 2) - self.assertEqual(str(ns2), ":" + s2) - self.assertEqual(ns2.encode(), ":" + s2) - self.assertEqual(common.Namespace.decode(ns2.encode()), ns2) - # - s1e1 = ":b\\B" - ns1e1 = common.Namespace.decode(s1e1) - self.assertLen(ns1e1, 1) - self.assertEqual(ns1e1.encode(), s1e1) - self.assertEqual(common.Namespace.decode(ns1e1.encode()), ns1e1) - ns1e2 = common.Namespace((s1e1.lstrip(":"),)) - self.assertLen(ns1e2, 1) - self.assertEqual(ns1e2.encode(), s1e1) - self.assertEqual(ns1e2, ns1e1) - self.assertEqual(common.Namespace.decode(ns1e2.encode()), ns1e2) - # - s1c = r":b\:B" - ns1c = common.Namespace.decode(s1c) - self.assertLen(ns1c, 1) - # Initial colon is harmlessly removed. - self.assertEqual(ns1c.encode(), s1c) - self.assertEqual(common.Namespace.decode(ns1c.encode()), ns1c) - self.assertEqual(common.Namespace(("b:B",)), ns1c) - - -class MetadataTest(absltest.TestCase): - def create_test_metadata(self): - md = common.Metadata({"bar": "bar_v"}, foo="foo_v") - md.ns("Name").update(foo="Name_foo_v", baz="Name_baz_v") - return md - - def test_empty_namespaces(self): - md = common.Metadata() - self.assertEmpty(list(md.namespaces())) - md = common.Metadata().ns("ns") - self.assertEmpty(list(md.namespaces())) - - def test_nonempty_namespaces(self): - mm = self.create_test_metadata() - self.assertLen(mm.namespaces(), 2) - - def test_getters_are_consistent_when_item_is_in_dict(self): - mm = self.create_test_metadata() - self.assertEqual(mm["foo"], "foo_v") - self.assertEqual(mm.get("foo"), "foo_v") - - def test_getters_are_consistent_when_item_is_not_in_dict(self): - mm = self.create_test_metadata() - self.assertIsNone(mm.get("baz")) - with self.assertRaises(KeyError): - _ = mm["baz"] - - def test_separator_is_not_allowed_as_keys_after_init(self): - mm = self.create_test_metadata() - with self.assertRaises(KeyError): - _ = mm["Name_foo"] - - def test_namespace_works_as_intended(self): - mm = self.create_test_metadata() - self.assertEqual(mm.ns("Name")["foo"], "Name_foo_v") - self.assertIsNone(mm.ns("Name").get("bar")) - - mm_name = mm.ns("Name") - self.assertEqual(mm_name["foo"], "Name_foo_v") - self.assertIsNone(mm_name.get("bar")) - self.assertEqual(mm.ns("Name")["foo"], "Name_foo_v") - - def test_create_new_namespace(self): - # Calling ns() with an unexisting namespace should work fine. - mm = self.create_test_metadata() - mm.ns("NewName")["foo"] = "NewName_foo_v" - self.assertEqual(mm.ns("NewName")["foo"], "NewName_foo_v") - self.assertIsNone(mm.ns("NewName").get("bar")) - - def test_changing_namespace_copies_reference(self): - mm = self.create_test_metadata() - # Calling ns() copies by reference so any changes to the returned Metadata - # object is reflected in the original object. - mm_in_namespace = mm.ns("Name") - mm_in_namespace["foofoo"] = "Name_foofoo_v" - self.assertEqual(mm.ns("Name")["foofoo"], "Name_foofoo_v") - - def test_iterators(self): - mm = self.create_test_metadata() - self.assertSequenceEqual(list(mm.keys()), ["bar", "foo"]) - self.assertSequenceEqual( - list(mm.ns("Name").values()), ["Name_foo_v", "Name_baz_v"] - ) - self.assertLen(list(mm.items()), 2) - - def test_repr_str(self): - mm = self.create_test_metadata() - self.assertNotEmpty(str(mm), "") - self.assertNotEmpty(repr(mm), repr("")) - - def test_update(self): - md = common.Metadata(foo="foo_v") - md.ns("Name").update(foo="Name_foo_v", baz="Name_baz_v") - - md2 = common.Metadata() - md2.ns("Name").update(foo="Name_foo_v2", bar="Name_bar_v2") - - md.ns("Name").update(md2.ns("Name")) - - self.assertLen(md.ns("Name"), 3) - self.assertIn("bar", md.ns("Name")) - - def test_copy(self): - # There's no useful distinction to be made between copy.copy() and - # copy.deepcopy(). - mm = common.Metadata().ns("ns1") - mm.update(foo="bar") - mm_copy = copy.copy(mm) - mm_deepcopy = copy.deepcopy(mm) - # Check that copies match. - self.assertEqual(mm["foo"], "bar") - self.assertEqual(mm_copy["foo"], "bar") - self.assertEqual(mm_deepcopy["foo"], "bar") - self.assertEqual(mm_deepcopy.namespaces(), mm.namespaces()) - self.assertEqual(mm_copy.namespaces(), mm.namespaces()) - # Check that the deep copy is disconnected. - mm_deepcopy["nerf"] = "gleep" - with self.assertRaises(KeyError): - mm["nerf"] # pylint: disable=pointless-statement - with self.assertRaises(KeyError): - mm_copy["nerf"] # pylint: disable=pointless-statement - # Check that the shallow copy shares the metadata store with the original. - mm_copy["blip"] = "tonk" - self.assertEqual(mm["blip"], mm_copy["blip"]) - # ... but no sharing with the deep copy. - with self.assertRaises(KeyError): - mm_deepcopy["blip"] # pylint: disable=pointless-statement - # Here's a test for a specific bug, where Metadata._store is improperly - # disconnected from Metadata._stores. - mx = common.Metadata() - copy.copy(mx).ns("A")["a"] = "Aa" - self.assertEqual(mx.ns("A")["a"], "Aa") - - def test_construction(self): - # Test with iterables. - m0i = common.Namespace([]) - self.assertEmpty(m0i) - m0d = common.Namespace.decode("") - self.assertEmpty(m0d) - self.assertEqual(m0d, m0i) - m1i = common.Namespace(["abc"]) - self.assertLen(m1i, 1) - self.assertEqual(m1i, common.Namespace(tuple(m1i))) - self.assertEqual(m1i, common.Namespace.decode(m1i.encode())) - m2i = common.Namespace(["abc", "def"]) - self.assertLen(m2i, 2) - self.assertEqual(m2i, common.Namespace(tuple(m2i))) - self.assertEqual(m2i, common.Namespace.decode(m2i.encode())) - m3i = common.Namespace(["abc", "de:f"]) - self.assertLen(m3i, 2) - self.assertEqual(m3i, common.Namespace(tuple(m3i))) - self.assertEqual(m3i, common.Namespace.decode(m3i.encode())) - # Test with strings. - m1sc = common.Namespace.decode(":abc") - self.assertLen(m1sc, 1) - self.assertEqual(m1sc, common.Namespace(tuple(m1sc))) - self.assertEqual(m1sc, common.Namespace.decode(m1sc.encode())) - m1s = common.Namespace.decode("abc") - self.assertLen(m1s, 1) - self.assertEqual(m1s, common.Namespace(tuple(m1s))) - self.assertEqual(m1s, common.Namespace.decode(m1s.encode())) - m2s = common.Namespace.decode("abc:def") - self.assertLen(m2s, 2) - self.assertEqual(m2s, common.Namespace(tuple(m2s))) - self.assertEqual(m2s, common.Namespace.decode(m2s.encode())) - m3s = common.Namespace.decode("abc:de\\f") - self.assertLen(m3s, 2) - self.assertEqual(m3s, common.Namespace(tuple(m3s))) - self.assertEqual(m3s, common.Namespace.decode(m3s.encode())) - - def test_startswith(self): - m1 = common.Namespace(["aa", "bb"]) - self.assertTrue(m1.startswith(common.Namespace(["aa"]))) - self.assertTrue(m1.startswith(common.Namespace(["aa", "bb"]))) - self.assertTrue(m1.startswith(m1)) - self.assertTrue(m1.startswith(common.Namespace(tuple(m1)))) - self.assertFalse(m1.startswith(common.Namespace(["bb"]))) - self.assertFalse(m1.startswith(common.Namespace(["aa", "bb", "cc"]))) - self.assertFalse(m1.startswith(common.Namespace(["bb", "bb"]))) - self.assertFalse(m1.startswith(common.Namespace(["aa", "aa"]))) - - def test_subnamespace(self): - mm = common.Metadata() - mm.ns("ns1")["foo"] = "bar" - mm.ns("ns2")["foo"] = "bar" - mm.ns("ns1").ns("ns11")["foo"] = "bar" - mm.ns("ns1").ns("ns:11")["gleep"] = "nerf" - - self.assertSequenceEqual( - mm.subnamespaces(), - [ - common.Namespace(["ns1"]), - common.Namespace(["ns2"]), - common.Namespace(["ns1", "ns11"]), - common.Namespace(["ns1", "ns:11"]), - ], - ) - self.assertSequenceEqual( - mm.ns("ns1").subnamespaces(), - [ - common.Namespace([]), - common.Namespace(["ns11"]), - common.Namespace(["ns:11"]), - ], - ) - self.assertSequenceEqual(mm.ns("ns2").subnamespaces(), [common.Namespace()]) - self.assertSequenceEqual(mm.ns("ns3").subnamespaces(), []) - - def test_namespace_add(self): - n0 = common.Namespace() - self.assertEmpty(n0) - self.assertEqual(n0 + (), common.Namespace([])) - self.assertEqual( - n0 + ("ab",), - common.Namespace( - [ - "ab", - ] - ), - ) - self.assertEqual(n0 + ("a:b",), common.Namespace(["a:b"])) - self.assertEqual(n0 + ("a:b",), common.Namespace(["a:b"])) - self.assertEqual(n0 + ("ab", "cd"), common.Namespace(["ab", "cd"])) - n1 = common.Namespace(["xy"]) - self.assertLen(n1, 1) - self.assertEqual(n1 + ("ab",), common.Namespace(["xy", "ab"])) - self.assertEqual(n1 + ("a:b",), common.Namespace(["xy", "a:b"])) - self.assertEqual(n1 + ("a:b",), common.Namespace(["xy", "a:b"])) - n2 = common.Namespace(["xy", "zw"]) - self.assertLen(n2, 2) - self.assertLen(n2 + ("ab",), 3) - self.assertEqual(n2 + ("ab",), common.Namespace(["xy", "zw", "ab"])) - self.assertLen(n2 + ("ab", "cd"), 4) - self.assertEqual(n2 + ("ab", "cd"), common.Namespace.decode("xy:zw:ab:cd")) - - def test_metadata_attach(self): - # Set up a metadata tree. - mm = common.Metadata() - mm.ns("ns1").ns("ns:11").update(foo="bar") - mm.ns("ns1").ns("ns12").update(foo="gleep") - mm.ns("ns1").update(foo="nerf") - mm.ns("ns|").update(foo="pag") - # Attach that metadata tree to a branch of an empty tree. - m1 = common.Metadata() - m1.ns("ns0").ns("ns00").attach(mm) - self.assertEmpty(m1.abs_ns()) - self.assertEqual(m1.ns("ns0").ns("ns00"), mm) - self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1", "ns:11"])["foo"], "bar") - self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1", "ns12"])["foo"], "gleep") - self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns1"])["foo"], "nerf") - self.assertEqual(m1.abs_ns(["ns0", "ns00", "ns|"])["foo"], "pag") - # Attach just part of $mm to a branch of a new, empty tree. - m2 = common.Metadata() - m2.ns("nsX").attach(mm.ns("ns1")) - self.assertEqual(m2.abs_ns(["nsX", "ns:11"])["foo"], "bar") - self.assertEqual(m2.abs_ns(["nsX", "ns12"])["foo"], "gleep") - self.assertEqual(m2.abs_ns(["nsX"])["foo"], "nerf") - # Check that attach() overwrites key collisions, but preserves other data. - m3 = common.Metadata() - m3["foo"] = "Y" # This will be overwritten. - m3["z"] = "Z" # This will not be overwritten. - m3.attach(mm.ns("ns1").ns("ns:11")) - self.assertEqual(m3["z"], "Z") - self.assertEqual(m3["foo"], "bar") - - -if __name__ == "__main__": - absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/context.py b/google/cloud/aiplatform/vizier/pyvizier/context.py deleted file mode 100644 index 230982671f..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/context.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Wrapper classes for Context protos and other messages in them.""" -from typing import Dict, Optional - -import attr -from google.cloud.aiplatform.vizier.pyvizier.shared import common -from google.cloud.aiplatform.vizier.pyvizier.shared import trial - -Metadata = common.Metadata -ParameterValue = trial.ParameterValue - - -@attr.s(auto_attribs=True, frozen=False, init=True, slots=True) -class Context: - """Wrapper for Context proto.""" - - description: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - on_setattr=attr.setters.validate, - ) - - parameters: Dict[str, ParameterValue] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(ParameterValue), - mapping_validator=attr.validators.instance_of(dict), - ), - on_setattr=attr.setters.validate, - ) # pytype: disable=wrong-arg-types - - metadata: Metadata = attr.ib( - init=True, - kw_only=True, - default=Metadata(), - validator=attr.validators.instance_of(Metadata), - on_setattr=attr.setters.validate, - ) - - related_links: Dict[str, str] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(str), - mapping_validator=attr.validators.instance_of(dict), - ), - on_setattr=attr.setters.validate, - ) # pytype: disable=wrong-arg-types diff --git a/google/cloud/aiplatform/vizier/pyvizier/context_test.py b/google/cloud/aiplatform/vizier/pyvizier/context_test.py deleted file mode 100644 index 65a1648238..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/context_test.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Tests for vizier.pyvizier.shared.context.""" - -from vizier._src.pyvizier.shared import context -from absl.testing import absltest - - -class ContextTest(absltest.TestCase): - def testDefaultsNotShared(self): - """Make sure default parameters are not shared between instances.""" - context1 = context.Context() - context2 = context.Context() - context1.parameters["x1"] = context.ParameterValue(5) - self.assertEmpty(context2.parameters) - - -if __name__ == "__main__": - absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py b/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py deleted file mode 100644 index 733d6d5ddb..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/metadata_util.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Utility functions for handling vizier metadata.""" - -from typing import Union, Optional, TypeVar, Type - -from google.cloud.aiplatform.compat.types import study as study_pb2 -from google.protobuf import any_pb2 -from google.protobuf.message import Message - -T = TypeVar("T") - - -def assign( - container: Union[study_pb2.StudySpec, study_pb2.Trial], - *, - key: str, - ns: str, - value: Union[str, any_pb2.Any, Message] -): # -> Tuple[key_value_pb2.KeyValue, bool]: - """Insert or assign (key, value) to container.metadata. - - Args: - container: container.metadata must be repeated KeyValue (protobuf) field. - key: - ns: A namespace for the key (defaults to '', which is the user's namespace). - value: Behavior depends on the type. `str` is copied to KeyValue.value - `any_pb2.Any` is copied to KeyValue.proto Other types are packed to - any_pb2.Any proto, which is then copied to KeyValue.proto. - - Returns: - (proto, inserted) where - proto is the protobuf that was just inserted into the $container, and - inserted is True if the proto was newly inserted, False if it was replaced. - """ - - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if isinstance(value, str): - kv.ClearField("proto") - kv.value = value - elif isinstance(value, any_pb2.Any): - kv.ClearField("value") - kv.proto.CopyFrom(value) - else: - kv.ClearField("value") - kv.proto.Pack(value) - return kv, False - - # The key does not exist in the metadata. - if isinstance(value, str): - metadata = container.metadata.add(key=key, ns=ns, value=value) - elif isinstance(value, any_pb2.Any): - metadata = container.metadata.add(key=key, ns=ns, proto=value) - else: - metadata = container.metadata.add(key=key, ns=ns) - metadata.proto.Pack(value) - return metadata, True - - -def get( - container: Union[study_pb2.StudySpec, study_pb2.Trial], *, key: str, ns: str -) -> Optional[str]: - """Returns the metadata value associated with key, or None. - - Args: - container: A Trial of a StudySpec in protobuf form. - key: The key of a KeyValue protobuf. - ns: A namespace for the key (defaults to '', which is the user's namespace). - """ - - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if not kv.HasField("proto"): - return kv.value - return None - - -def get_proto( - container: Union[study_pb2.StudySpec, study_pb2.Trial], - *, - key: str, - ns: str, - cls: Type[T] -) -> Optional[T]: - """Unpacks the proto metadata into message. - - Args: - container: (const) StudySpec or Trial to search the metadata from. - key: (const) Lookup key of the metadata. - ns: A namespace for the key (defaults to '', which is the user's namespace). - cls: Pass in a proto ***class***, not a proto object. - - Returns: - Proto message, if the value associated with the key exists and - can be parsed into proto; None otherwise. - """ - for kv in container.metadata: - if kv.key == key and kv.ns == ns: - if kv.HasField("proto"): - message = cls() - success = kv.proto.Unpack(message) - return message if success else None - return None diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py deleted file mode 100644 index 729b7a8882..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config.py +++ /dev/null @@ -1,590 +0,0 @@ -"""ParameterConfig wraps ParameterConfig and ParameterSpec protos.""" - -import collections -import copy -import enum -import math -from typing import Generator, List, Optional, Sequence, Tuple, Union - -import attr - -from google.cloud.aiplatform.vizier.pyvizier import trial - -ExternalType = trial.ExternalType - - -class ParameterType(enum.Enum): - """Valid Values for ParameterConfig.type.""" - - DOUBLE = "DOUBLE" - INTEGER = "INTEGER" - CATEGORICAL = "CATEGORICAL" - DISCRETE = "DISCRETE" - - def is_numeric(self) -> bool: - return self in [self.DOUBLE, self.INTEGER, self.DISCRETE] - - -class ScaleType(enum.Enum): - """Valid Values for ParameterConfig.scale_type.""" - - LINEAR = "LINEAR" - LOG = "LOG" - REVERSE_LOG = "REVERSE_LOG" - UNIFORM_DISCRETE = "UNIFORM_DISCRETE" - - -# A sequence of possible internal parameter values. -MonotypeParameterSequence = Union[Sequence[Union[int, float]], Sequence[str]] -MonotypeParameterList = Union[List[Union[int, float]], List[str]] - - -def _validate_bounds(bounds: Union[Tuple[int, int], Tuple[float, float]]): - """Validates the bounds.""" - if len(bounds) != 2: - raise ValueError("Bounds must have length 2. Given: {}".format(bounds)) - lower = bounds[0] - upper = bounds[1] - if not all([math.isfinite(v) for v in (lower, upper)]): - raise ValueError( - 'Both "lower" and "upper" must be finite. Given: (%f, %f)' % (lower, upper) - ) - if lower > upper: - raise ValueError( - "Lower cannot be greater than upper: given lower={} upper={}".format( - lower, upper - ) - ) - - -def _get_feasible_points_and_bounds( - feasible_values: Sequence[float], -) -> Tuple[List[float], Union[Tuple[int, int], Tuple[float, float]]]: - """Validates and converts feasible values to floats.""" - if not all([math.isfinite(p) for p in feasible_values]): - raise ValueError( - "Feasible values must all be finite. Given: {}" % feasible_values - ) - - feasible_points = list(sorted(feasible_values)) - bounds = (feasible_points[0], feasible_points[-1]) - return feasible_points, bounds - - -def _get_categories(categories: Sequence[str]) -> List[str]: - """Returns the categories.""" - return sorted(list(categories)) - - -def _get_default_value( - param_type: ParameterType, default_value: Union[float, int, str] -) -> Union[float, int, str]: - """Validates and converts the default_value to the right type.""" - if param_type in (ParameterType.DOUBLE, ParameterType.DISCRETE) and ( - isinstance(default_value, float) or isinstance(default_value, int) - ): - return float(default_value) - elif param_type == ParameterType.INTEGER and ( - isinstance(default_value, float) or isinstance(default_value, int) - ): - if isinstance(default_value, int): - return default_value - else: - # Check if the float rounds nicely. - default_int_value = round(default_value) - if not math.isclose(default_value, default_int_value): - raise ValueError( - "default_value for an INTEGER parameter should be an " - "integer, got float: [{}]".format(default_value) - ) - return default_int_value - elif param_type == ParameterType.CATEGORICAL and isinstance(default_value, str): - return default_value - raise ValueError( - "default_value has an incorrect type. ParameterType has type {}, " - "but default_value has type {}".format(param_type.name, type(default_value)) - ) - - -@attr.s(auto_attribs=True, frozen=True, init=True, slots=True) -class ParameterConfig: - """A Vizier ParameterConfig. - - Use ParameterConfig.factory to create a valid instance. - """ - - _name: str = attr.ib( - init=True, validator=attr.validators.instance_of(str), kw_only=True - ) - _type: ParameterType = attr.ib( - init=True, - validator=attr.validators.instance_of(ParameterType), - repr=lambda v: v.name if v is not None else "None", - kw_only=True, - ) - # Only one of _feasible_values, _bounds will be set at any given time. - _bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float)), - iterable_validator=attr.validators.instance_of(tuple), - ) - ), - kw_only=True, - ) - _feasible_values: Optional[MonotypeParameterList] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of((list, tuple)), - ) - ), - kw_only=True, - ) - _scale_type: Optional[ScaleType] = attr.ib( - init=True, - validator=attr.validators.optional(attr.validators.instance_of(ScaleType)), - repr=lambda v: v.name if v is not None else "None", - kw_only=True, - ) - _default_value: Optional[Union[float, int, str]] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.instance_of((float, int, str)) - ), - kw_only=True, - ) - _external_type: Optional[ExternalType] = attr.ib( - init=True, - validator=attr.validators.optional(attr.validators.instance_of(ExternalType)), - repr=lambda v: v.name if v is not None else "None", - kw_only=True, - ) - # Parent values for this ParameterConfig. If set, then this is a child - # ParameterConfig. - _matching_parent_values: Optional[MonotypeParameterList] = attr.ib( - init=True, - validator=attr.validators.optional( - attr.validators.deep_iterable( - member_validator=attr.validators.instance_of((int, float, str)), - iterable_validator=attr.validators.instance_of((list, tuple)), - ) - ), - kw_only=True, - ) - # Children ParameterConfig. If set, then this is a parent ParameterConfig. - _child_parameter_configs: Optional[List["ParameterConfig"]] = attr.ib( - init=True, kw_only=True - ) - - # Pytype treats instances of EnumTypeWrapper as types, but they can't be - # evaluated at runtime, so a Union[] of proto enums has to be a forward - # reference below. - @classmethod - def factory( - cls, - name: str, - *, - bounds: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None, - feasible_values: Optional[MonotypeParameterSequence] = None, - children: Optional[ - Sequence[Tuple[MonotypeParameterSequence, "ParameterConfig"]] - ] = None, - scale_type: Optional[ScaleType] = None, - default_value: Optional[Union[float, int, str]] = None, - external_type: Optional[ExternalType] = ExternalType.INTERNAL, - ) -> "ParameterConfig": - """Factory method. - - Args: - name: The parameter's name. Cannot be empty. - bounds: REQUIRED for INTEGER or DOUBLE type. Specifies (min, max). The - type of (min, max) determines the created ParameterConfig's type. - feasible_values: REQUIRED for DISCRETE or CATEGORICAL type. The elements' - type determines the created ParameterConfig's type. - children: sequence of tuples formatted as: (matching_parent_values, - ParameterConfig). See - cs/learning_vizier.service.ParameterConfig.child_parameter_configs for - details. ONLY THE TYPES ARE VALIDATED. If the child ParameterConfig - protos already have parent values set, they will be overridden by the - provided matching_parent_values. - scale_type: Scaling to be applied. NOT VALIDATED. - default_value: A default value for the Parameter. - external_type: An annotation indicating the type this parameter should be - cast to. - - Returns: - A ParameterConfig object which wraps a partially validated proto. - - Raises: - ValueError: Exactly one of feasible_values and bounds must be convertible - to Boolean true. Bounds and numeric feasible_values must be finite. - Bounds and feasible_values, if provided, must consist of - elements of the same type. - TypeError: If children's matching_parent_values are not compatible with - the ParameterConfig being created. - """ - if not name: - raise ValueError("Parameter name cannot be empty.") - - if bool(feasible_values) == bool(bounds): - raise ValueError( - "While creating Parameter with name={}: exactly one of " - '"feasible_values" or "bounds" must be provided, but given ' - "feasible_values={} and bounds={}.".format( - name, feasible_values, bounds - ) - ) - if feasible_values: - if len(set(feasible_values)) != len(feasible_values): - counter = collections.Counter(feasible_values) - duplicate_dict = {k: v for k, v in counter.items() if v > 1} - raise ValueError( - "Feasible values cannot have duplicates: {}".format(duplicate_dict) - ) - if all(isinstance(v, (float, int)) for v in feasible_values): - inferred_type = ParameterType.DISCRETE - feasible_values, bounds = _get_feasible_points_and_bounds( - feasible_values - ) - elif all(isinstance(v, str) for v in feasible_values): - inferred_type = ParameterType.CATEGORICAL - feasible_values = _get_categories(feasible_values) - else: - raise ValueError( - "Feasible values must all be numeric or strings. Given {}".format( - feasible_values - ) - ) - else: # bounds were specified. - if isinstance(bounds[0], int) and isinstance(bounds[1], int): - inferred_type = ParameterType.INTEGER - _validate_bounds(bounds) - elif isinstance(bounds[0], float) and isinstance(bounds[1], float): - inferred_type = ParameterType.DOUBLE - _validate_bounds(bounds) - else: - raise ValueError( - "Bounds must both be integers or doubles. Given: {}".format(bounds) - ) - - if default_value is not None: - default_value = _get_default_value(inferred_type, default_value) - - pc = cls( - name=name, - type=inferred_type, - bounds=bounds, - feasible_values=feasible_values, - scale_type=scale_type, - default_value=default_value, - external_type=external_type, - matching_parent_values=None, - child_parameter_configs=None, - ) - if children: - pc = pc.add_children(children) - return pc - - @property - def name(self) -> str: - return self._name - - @property - def type(self) -> ParameterType: - return self._type - - @property - def external_type(self) -> ExternalType: - return self._external_type - - @property - def scale_type(self) -> Optional[ScaleType]: - return self._scale_type - - @property - def bounds(self) -> Union[Tuple[float, float], Tuple[int, int]]: - """Returns the bounds, if set, or raises a ValueError.""" - if self.type == ParameterType.CATEGORICAL: - raise ValueError( - "Accessing bounds of a categorical parameter: %s" % self.name - ) - return self._bounds - - @property - def matching_parent_values(self) -> MonotypeParameterList: - """Returns the matching parent values, if this is a child parameter.""" - if not self._matching_parent_values: - return [] - return copy.copy(self._matching_parent_values) - - @property - def child_parameter_configs(self) -> List["ParameterConfig"]: - if not self._child_parameter_configs: - return [] - return copy.deepcopy(self._child_parameter_configs) - - def _del_child_parameter_configs(self): - """Deletes the current child ParameterConfigs.""" - object.__setattr__(self, "_child_parameter_configs", None) - - @property - def clone_without_children(self) -> "ParameterConfig": - """Returns the clone of self, without child_parameter_configs.""" - clone = copy.deepcopy(self) - clone._del_child_parameter_configs() # pylint: disable='protected-access' - return clone - - @property - def feasible_values(self) -> Union[List[int], List[float], List[str]]: - if self.type in (ParameterType.DISCRETE, ParameterType.CATEGORICAL): - if not self._feasible_values: - return [] - return copy.copy(self._feasible_values) - elif self.type == ParameterType.INTEGER: - return list(range(self.bounds[0], self.bounds[1] + 1)) - raise ValueError("feasible_values is invalid for type: %s" % self.type) - - @property - def default_value(self) -> Optional[Union[int, float, str]]: - """Returns the default value, or None if not set.""" - return self._default_value - - def _set_matching_parent_values(self, parent_values: MonotypeParameterSequence): - """Sets the given matching parent values in this object, without validation. - - Args: - parent_values: Parent values for which this child ParameterConfig is - active. Existing values will be replaced. - """ - object.__setattr__(self, "_matching_parent_values", list(parent_values)) - - def _set_child_parameter_configs(self, children: List["ParameterConfig"]): - """Sets the given child ParameterConfigs in this object, without validation. - - Args: - children: The children to set in this object. Existing children will be - replaced. - """ - object.__setattr__(self, "_child_parameter_configs", children) - - def add_children( - self, - new_children: Sequence[Tuple[MonotypeParameterSequence, "ParameterConfig"]], - ) -> "ParameterConfig": - """Clones the ParameterConfig and adds new children to it. - - Args: - new_children: A sequence of tuples formatted as: (matching_parent_values, - ParameterConfig). If the child ParameterConfig have pre-existing parent - values, they will be overridden. - - Returns: - A parent parameter config, with children set. - - Raises: - ValueError: If the child configs are invalid - TypeError: If matching parent values are invalid - """ - parent = copy.deepcopy(self) - if not new_children: - return parent - - for child_pair in new_children: - if len(child_pair) != 2: - raise ValueError( - "Each element in new_children must be a tuple of " - "(Sequence of valid parent values, ParameterConfig)," - " given: {}".format(child_pair) - ) - - child_parameter_configs = parent.child_parameter_configs - for unsorted_parent_values, child in new_children: - parent_values = sorted(unsorted_parent_values) - child_copy = copy.deepcopy(child) - if parent.type == ParameterType.DISCRETE: - if not all(isinstance(v, (float, int)) for v in parent_values): - raise TypeError( - "Parent is DISCRETE-typed, but a child is specifying " - "one or more non float/int parent values: child={} " - ", parent_values={}".format(child, parent_values) - ) - child_copy._set_matching_parent_values( - parent_values - ) # pylint: disable='protected-access' - elif parent.type == ParameterType.CATEGORICAL: - if not all(isinstance(v, str) for v in parent_values): - raise TypeError( - "Parent is CATEGORICAL-typed, but a child is " - "specifying one or more non float/int parent values: " - "child={}, parent_values={}".format(child, parent_values) - ) - child_copy._set_matching_parent_values( - parent_values - ) # pylint: disable='protected-access' - elif parent.type == ParameterType.INTEGER: - # Allow {int, float}->float conversion but block str->float conversion. - int_values = [int(v) for v in parent_values] - if int_values != parent_values: - raise TypeError( - "Parent is INTEGER-typed, but a child is specifying one or more " - "non-integral parent values: {}".format(parent_values) - ) - child_copy._set_matching_parent_values( - int_values - ) # pylint: disable='protected-access' - else: - raise ValueError("DOUBLE type cannot have child parameters") - child_parameter_configs.extend([child_copy]) - parent._set_child_parameter_configs( - child_parameter_configs - ) # pylint: disable='protected-access' - return parent - - def continuify(self) -> "ParameterConfig": - """Returns a newly created DOUBLE parameter with the same range.""" - if self.type == ParameterType.DOUBLE: - return copy.deepcopy(self) - elif not ParameterType.is_numeric(self.type): - raise ValueError( - "Cannot convert a non-numeric parameter to DOUBLE: {}".format(self) - ) - elif self._child_parameter_configs: - raise ValueError( - "Cannot convert a parent parameter to DOUBLE: {}".format(self) - ) - - scale_type = self.scale_type - if scale_type == ScaleType.UNIFORM_DISCRETE: - from absl import logging - - logging.log_every_n( - logging.WARNING, - "Converting a UNIFORM_DISCRETE scaled discrete parameter " - "to DOUBLE: %s", - 10, - self, - ) - scale_type = None - - default_value = self.default_value - if default_value is not None: - default_value = float(default_value) - return ParameterConfig.factory( - self.name, - bounds=(float(self.bounds[0]), float(self.bounds[1])), - scale_type=scale_type, - default_value=default_value, - ) - - @classmethod - def merge( - cls, one: "ParameterConfig", other: "ParameterConfig" - ) -> "ParameterConfig": - """Merge two ParameterConfigs. - - Args: - one: ParameterConfig with no child parameters. - other: Must have the same type as one, and may not have child parameters. - - Returns: - For Categorical, Discrete or Integer ParameterConfigs, the resulting - config will be the union of all feasible values. - For Double ParameterConfigs, the resulting config will have [min_value, - max_value] set to the smallest and largest bounds. - - Raises: - ValueError: If any of the input configs has child parameters, or if - the two parameters have different types. - """ - if one.child_parameter_configs or other.child_parameter_configs: - raise ValueError( - "Cannot merge parameters with child_parameter_configs: %s and %s" % one, - other, - ) - if one.type != other.type: - raise ValueError( - "Type conflicts between {} and {}".format( - one.type.name, other.type.name - ) - ) - if one.scale_type != other.scale_type: - from absl import logging - - logging.warning("Scale type conflicts while merging %s and %s", one, other) - - if one.type in (ParameterType.CATEGORICAL, ParameterType.DISCRETE): - new_feasible_values = list(set(one.feasible_values + other.feasible_values)) - return ParameterConfig.factory( - name=one.name, - feasible_values=new_feasible_values, - scale_type=one.scale_type, - ) - elif one.type in (ParameterType.INTEGER, ParameterType.DOUBLE): - original_min, original_max = one.bounds - other_min, other_max = other.bounds - new_bounds = (min(original_min, other_min), max(original_max, other_max)) - return ParameterConfig.factory( - name=one.name, bounds=new_bounds, scale_type=one.scale_type - ) - raise ValueError( - "Unknown type {}. This is currently" "an unreachable code.".format(one.type) - ) - - def traverse( - self, show_children: bool = False - ) -> Generator["ParameterConfig", None, None]: - """DFS Generator for parameter configs. - - Args: - show_children: If True, every generated ParameterConfig has - child_parameter_configs. For example, if 'foo' has two child configs - 'bar1' and 'bar2', then traversing 'foo' with show_children=True - generates (foo, with bar1,bar2 as children), (bar1), and (bar2). If - show_children=False, it generates (foo, without children), (bar1), and - (bar2). - - Yields: - DFS on all parameter configs. - """ - if show_children: - yield self - else: - yield self.clone_without_children - for child in self.child_parameter_configs: - yield from child.traverse(show_children) - - def contains( - self, value: Union[trial.ParameterValueTypes, trial.ParameterValue] - ) -> bool: - """Check if the `value` is a valid value for this parameter config.""" - if not isinstance(value, trial.ParameterValue): - value = trial.ParameterValue(value) - - if self.type == ParameterType.DOUBLE: - return self.bounds[0] <= value.as_float and value.as_float <= self.bounds[1] - elif self.type == ParameterType.INTEGER: - if value.as_int != value.as_float: - return False - return self.bounds[0] <= value.as_int and value.as_int <= self.bounds[1] - elif self.type == ParameterType.DISCRETE: - return value.as_float in self.feasible_values - elif self.type == ParameterType.CATEGORICAL: - return value.as_str in self.feasible_values - else: - raise NotImplementedError( - f"Cannot determine whether {value} is feasible" - f"for Unknown parameter type {self.type}.\n" - f"Full config: {repr(self)}" - ) - - @property - def num_feasible_values(self) -> Union[float, int]: - if self.type == ParameterType.DOUBLE: - return float("inf") - elif self.type == ParameterType.INTEGER: - return self.bounds[1] - self.bounds[0] + 1 - else: - return len(self.feasible_values) diff --git a/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py b/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py deleted file mode 100644 index 81c3304b32..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/parameter_config_test.py +++ /dev/null @@ -1,360 +0,0 @@ -"""Tests for vizier.pyvizier.shared.parameter_config.""" - -from typing import Any - -from vizier._src.pyvizier.shared import parameter_config as pc -from absl.testing import absltest -from absl.testing import parameterized - - -class ParameterConfigFactoryTest(parameterized.TestCase): - def testCreatesDoubleConfig(self): - parameter_config = pc.ParameterConfig.factory( - "name", - bounds=(-1.0, 1.0), - scale_type=pc.ScaleType.LINEAR, - default_value=0.1, - ) - self.assertEqual(parameter_config.name, "name") - self.assertEqual(parameter_config.type, pc.ParameterType.DOUBLE) - self.assertEqual(parameter_config.bounds, (-1, 1)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.LINEAR) - self.assertEqual(parameter_config.default_value, 0.1) - self.assertIsInstance(parameter_config.default_value, float) - with self.assertRaises(ValueError): - _ = parameter_config.feasible_values - - self.assertEqual(parameter_config.continuify(), parameter_config) - - def testCreatesIntegerConfig(self): - parameter_config = pc.ParameterConfig.factory( - "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=1 - ) - self.assertEqual(parameter_config.name, "name") - self.assertEqual(parameter_config.type, pc.ParameterType.INTEGER) - self.assertEqual(parameter_config.feasible_values, [1, 2, 3]) - self.assertEqual(parameter_config.bounds, (1, 3)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.LOG) - self.assertEqual(parameter_config.default_value, 1) - self.assertIsInstance(parameter_config.default_value, int) - - self.assertEqual( - parameter_config.continuify(), - pc.ParameterConfig.factory( - "name", - bounds=(1.0, 3.0), - scale_type=pc.ScaleType.LOG, - default_value=1.0, - ), - ) - - def testCreatesDiscreteConfig(self): - feasible_values = (-1, 3, 2) - parameter_config = pc.ParameterConfig.factory( - "name", - feasible_values=feasible_values, - scale_type=pc.ScaleType.UNIFORM_DISCRETE, - default_value=2, - external_type=pc.ExternalType.INTEGER, - ) - self.assertEqual(parameter_config.name, "name") - self.assertEqual(parameter_config.type, pc.ParameterType.DISCRETE) - self.assertEqual(parameter_config.feasible_values, [-1, 2, 3]) - self.assertEqual(parameter_config.bounds, (-1, 3)) - self.assertEqual(parameter_config.scale_type, pc.ScaleType.UNIFORM_DISCRETE) - self.assertEqual(parameter_config.default_value, 2) - self.assertIsInstance(parameter_config.default_value, float) - self.assertEqual(parameter_config.external_type, pc.ExternalType.INTEGER) - - self.assertEqual( - parameter_config.continuify(), - pc.ParameterConfig.factory("name", bounds=(-1.0, 3.0), default_value=2.0), - ) - - def testCreatesCategoricalConfig(self): - feasible_values = ("b", "a", "c") - parameter_config = pc.ParameterConfig.factory( - "name", feasible_values=feasible_values, default_value="c" - ) - self.assertEqual(parameter_config.name, "name") - self.assertEqual(parameter_config.feasible_values, ["a", "b", "c"]) - self.assertEqual(parameter_config.default_value, "c") - with self.assertRaises(ValueError): - _ = parameter_config.bounds - - def testCreatesDoubleConfigIntDefault(self): - parameter_config = pc.ParameterConfig.factory( - "name", bounds=(-1.0, 1.0), scale_type=pc.ScaleType.LINEAR, default_value=1 - ) - self.assertEqual(parameter_config.default_value, 1.0) - self.assertIsInstance(parameter_config.default_value, float) - - def testCreatesDiscreteConfigDoubleDefault(self): - feasible_values = (-1, 3, 2) - parameter_config = pc.ParameterConfig.factory( - "name", - feasible_values=feasible_values, - scale_type=pc.ScaleType.UNIFORM_DISCRETE, - default_value=2.0, - ) - self.assertEqual(parameter_config.default_value, 2.0) - self.assertIsInstance(parameter_config.default_value, float) - - def testCreatesIntegerConfigDoubleDefault(self): - parameter_config = pc.ParameterConfig.factory( - "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0 - ) - self.assertEqual(parameter_config.default_value, 2.0) - self.assertIsInstance(parameter_config.default_value, int) - - def testCreatesIntegerConfigInvalidDoubleDefault(self): - with self.assertRaisesRegex(ValueError, "default_value for an.*"): - pc.ParameterConfig.factory( - "name", bounds=(1, 3), scale_type=pc.ScaleType.LOG, default_value=2.0001 - ) - - def testCreatesCategoricalConfigNoDefault(self): - feasible_values = ("b", "a", "c") - parameter_config = pc.ParameterConfig.factory( - "name", feasible_values=feasible_values - ) - self.assertIsNone(parameter_config.default_value) - - def testCreatesCategoricalConfigBadDefault(self): - feasible_values = ("b", "a", "c") - with self.assertRaisesRegex( - ValueError, "default_value has an incorrect type.*" - ): - pc.ParameterConfig.factory( - "name", feasible_values=feasible_values, default_value=0.1 - ) - - def testRaisesErrorWhenNameIsEmpty(self): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory("", bounds=(-1.0, 1.0)) - - def testRaisesErrorWhenOverSpecified(self): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory( - "name", bounds=(-1.0, 1.0), feasible_values=["a", "b", "c"] - ) - - @parameterized.named_parameters( - ("HaveInfinity", (-float("inf"), 1)), - ("HaveNan", (1, float("nan"))), - ("HaveMixedTypes", (1, float(1))), - ("AreWronglyOrdered", (1, -1)), - ) - def testRaisesErrorWhenBounds(self, bounds): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory("name", bounds=bounds) - - @parameterized.named_parameters( - ("HaveDuplicateCategories", ["a", "a", "b"]), - ("HaveDuplicateNumbers", [1.0, 2.0, 2.0]), - ("HaveMixedTypes", ["a", 1, 2]), - ) - def testRaisesErrorWhenFeasibleValues(self, feasible_values): - with self.assertRaises(ValueError): - _ = pc.ParameterConfig.factory("name", feasible_values=feasible_values) - - -_child1 = pc.ParameterConfig.factory("double_child", bounds=(0.0, 1.0)) -_child2 = pc.ParameterConfig.factory("integer_child", bounds=(0, 1)) - - -class ParameterConfigFactoryTestWithChildren(parameterized.TestCase): - @parameterized.named_parameters( - ("IntParentValues", [([0], _child1), ([0, 1], _child2)]), - ("FloatParentValues", [([0.0], _child1), ([0.0, 1.0], _child2)]), - ) - def testIntegerWithValid(self, children): - p = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements( - p.child_parameter_configs[0].matching_parent_values, children[0][0] - ) - self.assertSameElements( - p.child_parameter_configs[1].matching_parent_values, children[1][0] - ) - - @parameterized.named_parameters( - ("FloatParentValues", [([0.5], _child1)]), - ("StringParentValues", [(["0"], _child1), (["0.0", "1.0"], _child2)]), - ) - def testIntegerWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) - - @parameterized.named_parameters( - ("IntParentValues", [([0], _child1), ([1], _child2)]), - ("FloatParentValues", [([0.0], _child1), ([0.0, 1.0], _child2)]), - ) - def testDiscreteWithValid(self, children): - p = pc.ParameterConfig.factory( - "parent", feasible_values=[0.0, 1.0], children=children - ) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements( - p.child_parameter_configs[0].matching_parent_values, children[0][0] - ) - self.assertSameElements( - p.child_parameter_configs[1].matching_parent_values, children[1][0] - ) - - @parameterized.named_parameters( - ("StringParentValues", [(["0.0"], _child1), (["0.0", "1.0"], _child2)]) - ) - def testDiscreteWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory( - "parent", feasible_values=[0.0, 1.0], children=children - ) - - @parameterized.named_parameters( # pyformat: disable - ("StringParentValues", [(["a"], _child1), (["a", "b"], _child2)]) - ) - def testCategoricalWithValid(self, children): - p = pc.ParameterConfig.factory( - "parent", feasible_values=["a", "b"], children=children - ) - self.assertLen(p.child_parameter_configs, 2) - self.assertEmpty(p.matching_parent_values) - self.assertSameElements( - p.child_parameter_configs[0].matching_parent_values, children[0][0] - ) - self.assertSameElements( - p.child_parameter_configs[1].matching_parent_values, children[1][0] - ) - - @parameterized.named_parameters( - ("StringParentValues", [(["0.0"], _child1), (["1.0"], _child2)]) - ) - def testCategoricalWithInvalid(self, children): - with self.assertRaises(TypeError): - _ = pc.ParameterConfig.factory( - "parent", feasible_values=[0.0, 1.0], children=children - ) - - def testAddChildren(self): - children = [(["a"], _child1), (["a", "b"], _child2)] - p = pc.ParameterConfig.factory( - "parent", feasible_values=["a", "b"], children=children - ) - new_children = [ - (["a"], pc.ParameterConfig.factory("double_child2", bounds=(1.0, 2.0))), - ( - ["b"], - pc.ParameterConfig.factory( - "categorical_child", feasible_values=["c", "d"] - ), - ), - ] - p2 = p.add_children(new_children) - self.assertLen(p.child_parameter_configs, 2) - self.assertSameElements( - [c.name for c in p.child_parameter_configs], [c[1].name for c in children] - ) - - self.assertLen(p2.child_parameter_configs, 4) - expected_names = [c[1].name for c in children] - expected_names += [c[1].name for c in new_children] - got_names = [c.name for c in p2.child_parameter_configs] - self.assertSameElements(got_names, expected_names) - - -class MergeTest(parameterized.TestCase): - def test_merge_bounds(self): - pc1 = pc.ParameterConfig.factory("pc1", bounds=(0.0, 2.0)) - pc2 = pc.ParameterConfig.factory("pc2", bounds=(-1.0, 1.0)) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory("pc1", bounds=(-1.0, 2.0)), - ) - - def test_merge_discrete(self): - pc1 = pc.ParameterConfig.factory( - "pc1", feasible_values=[0.0, 2.0], scale_type=pc.ScaleType.LINEAR - ) - pc2 = pc.ParameterConfig.factory("pc2", feasible_values=[-1.0, 0.0]) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory( - "pc1", feasible_values=[-1.0, 0.0, 2.0], scale_type=pc.ScaleType.LINEAR - ), - ) - - def test_merge_categorical(self): - pc1 = pc.ParameterConfig.factory("pc1", feasible_values=["a", "b"]) - pc2 = pc.ParameterConfig.factory("pc2", feasible_values=["a", "c"]) - self.assertEqual( - pc.ParameterConfig.merge(pc1, pc2), - pc.ParameterConfig.factory("pc1", feasible_values=["a", "b", "c"]), - ) - - -class ParameterConfigContainsTest(parameterized.TestCase): - @parameterized.parameters((1.0, True), (-2.0, False), (3.0, False)) - def testFloat(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory("pc1", bounds=(-1.0, 2.0)) - self.assertEqual(config.contains(value), expected) - - @parameterized.parameters((1, True), (-2, False), (3, False), (1.5, False)) - def testInt(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory("pc1", bounds=(-1, 2)) - self.assertEqual(config.contains(value), expected) - - @parameterized.parameters((1.0, False), (2, True), (-1, True)) - def testDiscrete(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory("pc1", feasible_values=[-1.0, 0.0, 2.0]) - self.assertEqual(config.contains(value), expected) - - @parameterized.parameters(("a", True), ("b", False), ("c", False)) - def testCategorical(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory("pc1", feasible_values=["a", "aa", "aaa"]) - self.assertEqual(config.contains(value), expected) - - @parameterized.parameters((True, True), ("a", False), (0, False)) - def testBoolean(self, value: Any, expected: bool): - config = pc.ParameterConfig.factory("pc1", feasible_values=["true", "false"]) - self.assertEqual(config.contains(value), expected) - - -class TraverseTest(parameterized.TestCase): - @parameterized.named_parameters( - ("ShowChildrenTrue", True), ("ShowChildrenFalse", False) - ) - def testTraverse(self, show_children): - grandchild1 = pc.ParameterConfig.factory("grandchild1", bounds=(-1.0, 1.0)) - grandchildren = [(["a"], grandchild1), (["b"], grandchild1)] - child1 = pc.ParameterConfig.factory( - "child1", feasible_values=["a", "b"], children=grandchildren - ) - - child2 = pc.ParameterConfig.factory("child2", bounds=(0.0, 1.0)) - children = [([0], child1), ([1], child1), ([0, 1], child2)] - parent = pc.ParameterConfig.factory("parent", bounds=(0, 1), children=children) - traversed_names = [ - pc.name for pc in parent.traverse(show_children=show_children) - ] - # Some parameter names are reused for separate child nodes, so they - # will appear multiple times, but they are indeed separate parameters. - self.assertEqual( - traversed_names, - [ - "parent", - "child1", - "grandchild1", - "grandchild1", - "child1", - "grandchild1", - "grandchild1", - "child2", - ], - ) - - -if __name__ == "__main__": - absltest.main() diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial.py b/google/cloud/aiplatform/vizier/pyvizier/trial.py deleted file mode 100644 index 581a8ab1dc..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/trial.py +++ /dev/null @@ -1,548 +0,0 @@ -"""Wrapper classes for Trial protos and other messages in them. - -Example usage: - trial = Trial.from_proto(trial_proto) - print('This trial's auc is: ', trial.final_measurement.metrics['auc'].value) - print('This trial had parameter "n_hidden_layers": ', - trial.parameters['n_hidden_layers'].value) -""" - -import collections -from collections import abc as cabc -import copy -import datetime -import enum -from typing import Any, Dict, List, MutableMapping, Optional, Union, FrozenSet - -import attr - -ParameterValueTypes = Union[str, int, float, bool] -OrderedDict = collections.OrderedDict - - -class ExternalType(enum.Enum): - """Valid Values for ParameterConfig.external_type.""" - - INTERNAL = "INTERNAL" - BOOLEAN = "BOOLEAN" - INTEGER = "INTEGER" - FLOAT = "FLOAT" - - -# Values should NEVER be removed from the enums below, only added. -class TrialStatus(enum.Enum): - """Values for Trial.Status.""" - - UNKNOWN = "UNKNOWN" - REQUESTED = "REQUESTED" - ACTIVE = "ACTIVE" - COMPLETED = "COMPLETED" - STOPPING = "STOPPING" - - -@attr.s(frozen=True, init=True, slots=True, kw_only=False) -class Metric: - """Enhanced immutable wrapper for vizier_pb2.Metric proto. - - It has an additional field "std" for internal usage. This field gets lost - when the object is converted to proto. - """ - - def _std_not_negative(self, _, stddev): - if stddev < 0: - raise ValueError("Standard deviation must be a non-negative finite number.") - - value: float = attr.ib( - converter=float, - init=True, - validator=[attr.validators.instance_of(float)], - kw_only=False, - ) - std: float = attr.ib( - converter=float, - validator=[attr.validators.instance_of(float), _std_not_negative], - init=True, - default=0.0, - kw_only=True, - ) - - -@attr.s(auto_attribs=True, frozen=True, init=True, slots=True, repr=False) -class ParameterValue: - """Immutable wrapper for vizier_pb2.Parameter.value, which is a oneof field. - - Has accessors (properties) that cast the value into the type according - to StudyConfiguration class behavior. In particular, 'true' and 'false' are - treated as special strings that are cast to a numeric value of 1 and 0, - respectively, and boolean value of True and False, repectively. - """ - - value: ParameterValueTypes = attr.ib( - init=True, - validator=[ - attr.validators.instance_of((str, int, float, bool)), - ], - ) - - def cast( - self, - external_type: ExternalType, - ) -> ParameterValueTypes: - """Returns ParameterValue cast to external_type. - - Args: - external_type: - - Returns: - self.value if external_type is INTERNAL. - self.as_bool if external_type is BOOLEAN. - self.as_int if external_type is INTEGER. - self.as_float if external_type is FLOAT. - - Raises: - ValueError: If external_type is not valid. - """ - if external_type == ExternalType.INTERNAL: - return self.value - elif external_type == ExternalType.BOOLEAN: - return self.as_bool - elif external_type == ExternalType.INTEGER: - return self.as_int - elif external_type == ExternalType.FLOAT: - return self.as_float - else: - raise ValueError( - "Unknown external type enum value: {}.".format(external_type) - ) - - @property - def as_float(self) -> Optional[float]: - """Returns the value cast to float.""" - if self.value == "true": - return 1.0 - elif self.value == "false": - return 0.0 - elif isinstance(self.value, str): - return None - return float(self.value) - - @property - def as_int(self) -> Optional[int]: - """Returns the value cast to int.""" - if self.value == "true": - return 1 - elif self.value == "false": - return 0 - elif isinstance(self.value, str): - return None - return int(self.value) - - @property - def as_str(self) -> Optional[str]: - """Returns str-typed value or lowercase 'true'/'false' if value is bool.""" - if isinstance(self.value, bool): - return str(self.value).lower() - elif isinstance(self.value, str): - return self.value - return None - - @property - def as_bool(self) -> Optional[bool]: - """Returns the value as bool following StudyConfiguration's behavior. - - Returns: True if value is 'true' or 1. False if value is - 'false' or 0. For all other cases, returns None. - For string type, this behavior is consistent with how - StudyConfiguration.AddBooleanParameter's. For other types, this - guarantees that self.value == self.as_bool - """ - if isinstance(self.value, str): - if self.value.lower() == "true": - return True - elif self.value.lower() == "false": - return False - else: - if self.value == 1.0: - return True - elif self.value == 0.0: - return False - return None - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return str(self.value) - - -class _MetricDict(collections.UserDict): - def __setitem__(self, key: str, value: Union[float, Metric]): - if isinstance(value, Metric): - self.data.__setitem__(key, value) - else: - self.data.__setitem__(key, Metric(value=value)) - - -@attr.s(auto_attribs=True, frozen=False, init=True, slots=True) -class Measurement: - """Collection of metrics with a timestamp.""" - - def _value_is_finite(self, _, value): - import numpy as np - - if not (np.isfinite(value) and value >= 0): - raise ValueError("Must be finite and non-negative.") - - # Should be used as a regular Dict. - metrics: MutableMapping[str, Metric] = attr.ib( - init=True, - converter=lambda d: _MetricDict(**d), - default=_MetricDict(), - validator=attr.validators.instance_of(_MetricDict), - on_setattr=[attr.setters.convert, attr.setters.validate], - ) - - elapsed_secs: float = attr.ib( - converter=float, - init=True, - default=0, - validator=[attr.validators.instance_of(float), _value_is_finite], - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True, - ) - - steps: float = attr.ib( - converter=int, - init=True, - default=0, - validator=[attr.validators.instance_of(int), _value_is_finite], - on_setattr=[attr.setters.convert, attr.setters.validate], - kw_only=True, - ) - - -def _to_local_time(dt: Optional[datetime.datetime]) -> Optional[datetime.datetime]: - """Converter for initializing timestamps in Trial class.""" - return dt.astimezone() if dt else None - - -@attr.define(init=False, frozen=True, eq=True) -class ParameterDict(cabc.MutableMapping): - """Parameter dictionary. - - Maps the parameter names to their values. Works like a regular - dict[str, ParameterValue] for the most part, except one can directly assign - values of type `ParameterValueType`. So, - ParameterDict(a=3) and - ParameterDict(a=ParameterValue(3)) are equivalent. - - To access the raw value directly, use get_value() method. - d.get('a').value == d.get_value('a') - """ - - _items: MutableMapping[str, ParameterValue] = attr.field(init=False, factory=dict) - - def __init__(self, iterable: Any = tuple(), **kwargs): - self.__attrs_init__() - self.update(iterable, **kwargs) - - def __setitem__(self, key: str, value: Union[ParameterValue, ParameterValueTypes]): - if isinstance(value, ParameterValue): - self._items[key] = value - else: - self._items[key] = ParameterValue(value) - - def __delitem__(self, key: str): - del self._items[key] - - def __getitem__(self, key: str) -> ParameterValue: - return self._items[key] - - def __len__(self) -> int: - return len(self._items) - - def __iter__(self): - return iter(self._items) - - def get_value( - self, key: str, default: Optional[ParameterValueTypes] = None - ) -> Optional[ParameterValueTypes]: - pv = self.get(key, default) - if isinstance(pv, ParameterValue): - return pv.value - else: - return pv - - -@attr.define(auto_attribs=True, frozen=False, init=True, slots=True) -class Trial: - """Wrapper for learning_vizier.service.Trial proto.""" - - id: int = attr.ib( - init=True, - kw_only=True, - default=0, - validator=attr.validators.instance_of(int), - ) - - _is_requested: bool = attr.ib( - init=True, - kw_only=True, - default=False, - validator=attr.validators.instance_of(bool), - ) - - assigned_worker: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - stopping_reason: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - _infeasibility_reason: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - description: Optional[str] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(str)), - ) - - parameters: ParameterDict = attr.field( - init=True, - kw_only=True, - factory=ParameterDict, - converter=ParameterDict, - validator=attr.validators.instance_of(ParameterDict), - ) - - related_links: Dict[str, str] = attr.ib( - init=True, - kw_only=True, - factory=dict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(str), - mapping_validator=attr.validators.instance_of(dict), - ), - ) # pytype: disable=wrong-arg-types - - final_measurement: Optional[Measurement] = attr.ib( - init=True, - kw_only=True, - default=None, - validator=attr.validators.optional(attr.validators.instance_of(Measurement)), - ) - - measurements: List[Measurement] = attr.ib( - init=True, - kw_only=True, - default=list(), - validator=attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(Measurement), - iterable_validator=attr.validators.instance_of(list), - ), - ) - - creation_time: Optional[datetime.datetime] = attr.ib( - init=True, - default=datetime.datetime.now(), - converter=_to_local_time, - kw_only=True, - repr=lambda v: v.strftime("%x %X") if v is not None else "None", - validator=attr.validators.optional( - attr.validators.instance_of(datetime.datetime) - ), - ) - - completion_time: Optional[datetime.datetime] = attr.ib( - init=True, - kw_only=True, - default=None, - repr=lambda v: v.strftime("%x %X") if v is not None else "None", - converter=_to_local_time, - validator=attr.validators.optional( - attr.validators.instance_of(datetime.datetime) - ), - ) - - @property - def duration(self) -> Optional[datetime.timedelta]: - """Returns the duration of this Trial if it is completed, or None.""" - if self.completion_time: - return self.completion_time - self.creation_time - else: - return None - - @property - def status(self) -> TrialStatus: - """Status. - - COMPLETED: Trial has final measurement or is declared infeasible. - ACTIVE: Trial is being evaluated. - STOPPING: Trial is being evaluated, but was decided to be not worth further - evaluating. - REQUESTED: Trial is queued for future suggestions. - """ - if self.final_measurement is not None or self.infeasible: - return TrialStatus.COMPLETED - elif self.stopping_reason is not None: - return TrialStatus.STOPPING - elif self._is_requested: - return TrialStatus.REQUESTED - else: - return TrialStatus.ACTIVE - - @property - def is_completed(self) -> bool: - """Returns True if this Trial is completed.""" - from absl import logging - - if self.status == TrialStatus.COMPLETED: - if self.completion_time is None: - logging.warning( - "Invalid Trial state: status is COMPLETED, but a " - " completion_time was not set" - ) - return True - elif self.completion_time is not None: - if self.status is None: - logging.warning( - "Invalid Trial state: status is not set to COMPLETED, " - "but a completion_time is set" - ) - return True - return False - - @property - def infeasible(self) -> bool: - """Returns True if this Trial is infeasible.""" - return self._infeasibility_reason is not None - - @property - def infeasibility_reason(self) -> Optional[str]: - """Returns this Trial's infeasibility reason, if set.""" - return self._infeasibility_reason - - def complete(self, measurement: Measurement, *, inplace: bool = True) -> "Trial": - """Completes the trial and returns it. - - Args: - measurement: Measurement to complete the trial with. - inplace: If True, Trial is modified in place. If False, which is the - default, then the operation is performed and it returns a copy of the - object - - Returns: - Completed Trial. - """ - if inplace: - # Use setattr. If we assign to self.final_measurement, then hyperref - # mechanisms think this line is where `final_measurement` property - # is defined, instead of where we declare attr.ib. - self.__setattr__("final_measurement", copy.deepcopy(measurement)) - self.completion_time = _to_local_time(datetime.datetime.now()) - return self - else: - clone = copy.deepcopy(self) - return clone.complete(measurement, inplace=True) - - -# Define aliases. -CompletedTrial = Trial -PendingTrial = Trial -CompletedTrialWithMeasurements = Trial -PendingTrialWithMeasurements = Trial - - -@attr.frozen -class TrialSuggestion: - """Freshly suggested trial. - - Suggestion can be converted to Trial object which has more functionalities. - """ - - parameters: ParameterDict = attr.field( - init=True, - factory=ParameterDict, - converter=ParameterDict, - validator=attr.validators.instance_of(ParameterDict), - ) # pytype: disable=wrong-arg-types - - def to_trial(self, uid: int) -> Trial: - """Assign an id and make it a Trial object. - - Usually SuggetedTrial objects are shorted-lived and not exposed to end - users. This method is for non-service usage of trial suggestions in - benchmarks, tests, colabs, etc. - - Args: - uid: Trial id. - - Returns: - Trial object. - """ - return Trial(id=uid, parameters=self.parameters) - - -@attr.define -class TrialFilter: - """Trial filter. - - All filters are by default 'AND' conditions. - - Attributes: - ids: If set, requires the trial's id to be in the set. - min_id: If set, requires the trial's id to be at least this number. - max_id: If set, requires the trial's id to be at most this number. - status: If set, requires the trial's status to be in the set. - """ - - ids: Optional[FrozenSet[int]] = attr.field( - default=None, - converter=lambda x: frozenset(x) if x is not None else None, - validator=attr.validators.optional( - attr.validators.deep_iterable( - attr.validators.instance_of(int), attr.validators.instance_of(frozenset) - ) - ), - ) - min_id: Optional[int] = attr.field(default=None) - max_id: Optional[int] = attr.field(default=None) - status: Optional[FrozenSet[TrialStatus]] = attr.field( - default=None, - converter=lambda x: frozenset(x) if x is not None else None, - validator=attr.validators.optional( - attr.validators.deep_iterable( - attr.validators.instance_of(TrialStatus), - attr.validators.instance_of(frozenset), - ) - ), - ) - - def __call__(self, trial: Trial) -> bool: - if self.ids is not None: - if trial.id not in self.ids: - return False - if self.min_id is not None: - if trial.id < self.min_id: - return False - if self.max_id is not None: - if trial.id > self.max_id: - return False - if self.status is not None: - if trial.status not in self.status: - return False - return True diff --git a/google/cloud/aiplatform/vizier/pyvizier/trial_test.py b/google/cloud/aiplatform/vizier/pyvizier/trial_test.py deleted file mode 100644 index 1eee089706..0000000000 --- a/google/cloud/aiplatform/vizier/pyvizier/trial_test.py +++ /dev/null @@ -1,211 +0,0 @@ -"""Tests for vizier.pyvizier.shared.trial.""" -import copy -import datetime - -import numpy as np - -from vizier._src.pyvizier.shared import trial -from absl.testing import absltest -from absl.testing import parameterized - -Metric = trial.Metric -Measurement = trial.Measurement - - -class MetricTest(absltest.TestCase): - def testMetricCreation(self): - _ = Metric(value=0, std=0.5) - - def testMetricCanHaveNaN(self): - _ = Metric(value=np.nan, std=-np.nan) - - def testMetricCannotHaveNegativeStd(self): - with self.assertRaises(ValueError): - _ = Metric(value=0, std=-0.5) - - -class MeasurementTest(absltest.TestCase): - def testMetricsInitializedFromFloats(self): - m = Measurement() - m.metrics = dict(a=0.3) - self.assertEqual(m.metrics["a"], Metric(0.3)) - m.metrics["b"] = 0.5 - self.assertEqual(m.metrics, {"a": Metric(0.3), "b": Metric(0.5)}) - - def testMetrics(self): - m = Measurement() - m.metrics = dict(a=Metric(0.3)) - self.assertEqual(m.metrics["a"], Metric(0.3)) - - def testTimeStampsAreNotFrozen(self): - m = Measurement() - m.elapsed_secs = 1.0 - m.steps = 5 - - -ParameterValue = trial.ParameterValue - - -class ParameterValueTest(parameterized.TestCase): - @parameterized.named_parameters(("True", True), ("False", False)) - def testBool(self, bool_value): - value = ParameterValue(bool_value) - self.assertEqual(value.as_float, float(bool_value)) - self.assertEqual(value.as_int, int(bool_value)) - self.assertEqual(value.as_str, str(bool_value).lower()) - - def testIntegralFloat0(self): - value = ParameterValue(0.0) - self.assertEqual(value.as_float, 0.0) - self.assertEqual(value.as_int, 0) - self.assertEqual(value.as_bool, False) - self.assertIsNone(value.as_str) - - def testIntegralFloat1(self): - value = ParameterValue(1.0) - self.assertEqual(value.as_float, 1.0) - self.assertEqual(value.as_int, 1) - self.assertEqual(value.as_bool, True) - self.assertIsNone(value.as_str) - - def testIntegralFloat2(self): - value = ParameterValue(2.0) - self.assertEqual(value.as_float, 2.0) - self.assertEqual(value.as_int, 2) - self.assertIsNone(value.as_bool) - self.assertIsNone(value.as_str) - - def testInteger0(self): - value = ParameterValue(0) - self.assertEqual(value.as_float, 0) - self.assertEqual(value.as_int, 0) - self.assertEqual(value.as_bool, False) - self.assertIsNone(value.as_str) - - def testInteger1(self): - value = ParameterValue(1) - self.assertEqual(value.as_float, 1) - self.assertEqual(value.as_int, 1) - self.assertEqual(value.as_bool, True) - self.assertIsNone(value.as_str) - - def testInteger2(self): - value = ParameterValue(2) - self.assertEqual(value.as_float, 2) - self.assertEqual(value.as_int, 2) - self.assertIsNone(value.as_bool) - self.assertIsNone(value.as_str) - - def testStringTrue(self): - value = ParameterValue("true") - self.assertEqual(value.as_bool, True) - self.assertEqual(value.as_str, "true") - - def testStringFalse(self): - value = ParameterValue("false") - self.assertEqual(value.as_bool, False) - self.assertEqual(value.as_str, "false") - - def testCastAsExternalNone(self): - value = ParameterValue(1.0) - # pytype: disable=wrong-arg-types - with self.assertRaisesRegex(ValueError, "Unknown external type"): - value.cast(None) - # pytype: enable=wrong-arg-types - - def testParameterCanHaveNonFiniteValues(self): - ParameterValue(float("nan")) - ParameterValue(value=float("inf")) - ParameterValue(value=float("inf")) - - -class TrialTest(absltest.TestCase): - def testCompleteInplace(self): - test = trial.Trial() - measurement = Measurement( - metrics={"pr-auc": Metric(value=0.8), "latency": Metric(value=32)} - ) - completed = test.complete(measurement, inplace=True) - - # The trial was completed in place. - self.assertEqual(test.final_measurement, measurement) - self.assertLessEqual(test.completion_time, datetime.datetime.now().astimezone()) - self.assertGreaterEqual(test.completion_time, test.creation_time) - self.assertGreaterEqual(test.duration.total_seconds(), 0) - - # completed is the same reference as test. - self.assertEqual(test, completed) - - def testCompleteNotInplace(self): - """Complete with inplace=False.""" - test = trial.Trial() - measurement = Measurement( - metrics={"pr-auc": Metric(value=0.8), "latency": Metric(value=32)} - ) - - test_copy = copy.deepcopy(test) - - completed = test.complete(measurement, inplace=False) - - # The returned Trial is completed. - self.assertEqual(completed.final_measurement, measurement) - self.assertGreaterEqual(completed.completion_time, completed.creation_time) - self.assertLessEqual( - completed.completion_time, datetime.datetime.now().astimezone() - ) - self.assertGreaterEqual(completed.duration.total_seconds(), 0) - self.assertEqual(completed.status, trial.TrialStatus.COMPLETED) - self.assertTrue(completed.is_completed) - - # The original Trial is unchanged. - self.assertEqual(test_copy, test) - self.assertIsNone(test.final_measurement) - self.assertIsNone(test.completion_time) - self.assertIsNone(test.duration) - self.assertEqual(test.status, trial.TrialStatus.ACTIVE) - self.assertFalse(test.is_completed) - - def testDefaultsNotShared(self): - """Make sure default parameters are not shared between instances.""" - trial1 = trial.Trial() - trial2 = trial.Trial() - trial1.parameters["x1"] = trial.ParameterValue(5) - self.assertEmpty(trial2.parameters) - - -class ParameterDictTest(parameterized.TestCase): - @parameterized.parameters((True,), (3,), (1.0,), ("aa",)) - def testAssignRawValue(self, v): - d = trial.ParameterDict() - d["p1"] = v - self.assertEqual(d.get("p1"), trial.ParameterValue(v)) - self.assertEqual(d.get_value("p1"), v) - self.assertEqual(d.get_value("p2", "default"), "default") - self.assertLen(d, 1) - self.assertLen(d.items(), 1) - - @parameterized.parameters((True,), (3,), (1.0,), ("aa",)) - def testAssignWrappedValue(self, v): - d = trial.ParameterDict() - v = trial.ParameterValue(v) - d["p1"] = v - self.assertEqual(d.get("p1"), v) - self.assertEqual(d.get_value("p1"), v.value) - self.assertEqual(d.get_value("p2", "default"), "default") - self.assertLen(d, 1) - self.assertLen(d.items(), 1) - - -class SuggestionTestI(absltest.TestCase): - def testToTrial(self): - suggestion = trial.TrialSuggestion({"a": 3, "b": True}) - suggestion.metadata["key"] = "value" - - t = suggestion.to_trial(1) - self.assertEqual(t.id, 1) - self.assertEqual(t.parameters, suggestion.parameters) - self.assertEqual(t.metadata, suggestion.metadata) - - -if __name__ == "__main__": - absltest.main() From 50aacade92694ddef659e38f894b81c0d349346e Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 15 Jul 2022 15:44:03 -0700 Subject: [PATCH 27/36] Import the google-vizier and fix the dependencies for Vertex Vizier. --- setup.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/setup.py b/setup.py index a435a37e82..5d862ce66d 100644 --- a/setup.py +++ b/setup.py @@ -64,13 +64,11 @@ "absl-py>=0.7", "numpy>=1.19.0", "protobuf>=3.6,<4.0", - #"pytype==2022.1.5", "keras-tuner>=1.0,<2.0", "portpicker==1.3.1", "googleapis-common-protos==1.56.0", "google-api-python-client==1.12.8", -# "sqlalchemy==1.4", - "google-vizier @ git+https://github.com/halio-g/vizier.git", + "google-vizier==0.0.3a0", ] private_endpoints_extra_require = [ @@ -156,8 +154,5 @@ "Topic :: Internet", "Topic :: Software Development :: Libraries :: Python Modules", ], -# dependency_links=[ -# os.path.join(os.getcwd(), 'google-vizier', 'google_vizier-0.0.3a0-py3.7.egg') -# ], zip_safe=False, ) From 5fbc41dd3071001802d6f972d8b38cb3fb6d30ba Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 19 Jul 2022 12:08:41 -0700 Subject: [PATCH 28/36] Configured the dependency of the google-vizier. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5d862ce66d..dc79a2f4fd 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ "portpicker==1.3.1", "googleapis-common-protos==1.56.0", "google-api-python-client==1.12.8", - "google-vizier==0.0.3a0", + "google-vizier==0.0.3a", ] private_endpoints_extra_require = [ From 9be41cc1cb8309c0b39a131877f39160e5c7fea9 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 19 Jul 2022 12:18:25 -0700 Subject: [PATCH 29/36] Ran the nox -s blacken. --- .../vizier/pyvizier/proto_converters.py | 13 ++++------- .../vizier/pyvizier/study_config.py | 23 ++++++------------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py index 9099e0ac8d..86ce9df770 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py +++ b/google/cloud/aiplatform/vizier/pyvizier/proto_converters.py @@ -17,6 +17,7 @@ _ScaleTypePb2 = study_pb2.StudySpec.ParameterSpec.ScaleType + class _ScaleTypeMap: """Proto converter for scale type.""" @@ -192,9 +193,7 @@ def _set_child_parameter_configs( Raises: ValueError: If the child configs are invalid """ - children: List[ - Tuple[MonotypeParameterSequence, ParameterConfig] - ] = [] + children: List[Tuple[MonotypeParameterSequence, ParameterConfig]] = [] for child in pc.child_parameter_configs: children.append((child.matching_parent_values, child)) if not children: @@ -240,9 +239,7 @@ def _set_child_parameter_configs( ) @classmethod - def to_proto( - cls, pc: ParameterConfig - ) -> study_pb2.StudySpec.ParameterSpec: + def to_proto(cls, pc: ParameterConfig) -> study_pb2.StudySpec.ParameterSpec: """Returns a ParameterConfig Proto.""" proto = study_pb2.StudySpec.ParameterSpec(parameter_id=pc.name) if pc.type == ParameterType.DISCRETE: @@ -266,9 +263,7 @@ class ParameterValueConverter: """Converter for ParameterValue.""" @classmethod - def from_proto( - cls, proto: study_pb2.Trial.Parameter - ) -> Optional[ParameterValue]: + def from_proto(cls, proto: study_pb2.Trial.Parameter) -> Optional[ParameterValue]: """Returns whichever value that is populated, or None.""" potential_value = proto.value if ( diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index eaad85e9a8..7e47d67d33 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -44,6 +44,7 @@ ################### Enums ################### + class Algorithm(enum.Enum): """Valid Values for StudyConfig.Algorithm.""" @@ -71,9 +72,7 @@ class MetricInformationConverter: """A wrapper for vizier_pb2.MetricInformation.""" @classmethod - def from_proto( - cls, proto: study_pb2.StudySpec.MetricSpec - ) -> MetricInformation: + def from_proto(cls, proto: study_pb2.StudySpec.MetricSpec) -> MetricInformation: """Converts a MetricInformation proto to a MetricInformation object.""" if proto.goal not in list(ObjectiveMetricGoal): raise ValueError("Unknown MetricInformation.goal: {}".format(proto.goal)) @@ -88,9 +87,7 @@ def from_proto( ) @classmethod - def to_proto( - cls, obj: MetricInformation - ) -> study_pb2.StudySpec.MetricSpec: + def to_proto(cls, obj: MetricInformation) -> study_pb2.StudySpec.MetricSpec: """Returns this object as a proto.""" return study_pb2.StudySpec.MetricSpec(metric_id=obj.name, goal=obj.goal.value) @@ -196,9 +193,7 @@ class StudyConfig(ProblemStatement): kw_only=True, ) - automated_stopping_config: Optional[ - AutomatedStoppingConfig - ] = attr.field( + automated_stopping_config: Optional[AutomatedStoppingConfig] = attr.field( init=True, default=None, validator=attr.validators.optional( @@ -239,10 +234,8 @@ def from_proto(cls, proto: study_pb2.StudySpec) -> "StudyConfig": if not oneof_name: automated_stopping_config = None else: - automated_stopping_config = ( - AutomatedStoppingConfig.from_proto( - getattr(proto, oneof_name) - ) + automated_stopping_config = AutomatedStoppingConfig.from_proto( + getattr(proto, oneof_name) ) return cls( @@ -362,9 +355,7 @@ def trial_parameters( pytrial = proto_converters.TrialConverter.from_proto(proto) return self._pytrial_parameters(pytrial) - def _pytrial_parameters( - self, pytrial: Trial - ) -> Dict[str, ParameterValueSequence]: + def _pytrial_parameters(self, pytrial: Trial) -> Dict[str, ParameterValueSequence]: """Returns the trial values, cast to external types, if they exist. Args: From ed0d98d4ec3b7a5a2d01132f226525473bfedbc7 Mon Sep 17 00:00:00 2001 From: halio-g Date: Tue, 19 Jul 2022 14:16:54 -0700 Subject: [PATCH 30/36] Fixed the lint issue. --- google/cloud/aiplatform/vizier/pyvizier/__init__.py | 2 ++ google/cloud/aiplatform/vizier/pyvizier/study_config.py | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index 590673778b..e388ae69c8 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -55,7 +55,9 @@ "MetadataValue", "Namespace", "ParameterConfigConverter", + "ParameterValueTypes", "MeasurementConverter", + "MonotypeParameterSequence", "TrialConverter", "StudyConfig", "Algorithm", diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index 7e47d67d33..86c6130cc1 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -24,11 +24,13 @@ from google.cloud.aiplatform.vizier.pyvizier import proto_converters from google.cloud.aiplatform.vizier.pyvizier import SearchSpace from google.cloud.aiplatform.vizier.pyvizier import ProblemStatement +from google.cloud.aiplatform.vizier.pyvizier import ObjectiveMetricGoal from google.cloud.aiplatform.vizier.pyvizier import SearchSpaceSelector from google.cloud.aiplatform.vizier.pyvizier import MetricsConfig from google.cloud.aiplatform.vizier.pyvizier import MetricInformation from google.cloud.aiplatform.vizier.pyvizier import Trial from google.cloud.aiplatform.vizier.pyvizier import ParameterValueTypes +from google.cloud.aiplatform.vizier.pyvizier import ParameterConfig from google.cloud.aiplatform.compat.types import study as study_pb2 ################### PyTypes ################### @@ -304,9 +306,9 @@ def _trial_to_external_values( parameter_values: Dict[str, Union[float, int, str]] = {} external_values: Dict[str, Union[float, int, str, bool]] = {} # parameter_configs is a list of Tuple[parent_name, ParameterConfig]. - parameter_configs: List[ - Tuple[Optional[str], parameter_config.ParameterConfig] - ] = [(None, p) for p in self.search_space.parameters] + parameter_configs: List[Tuple[Optional[str], ParameterConfig]] = [ + (None, p) for p in self.search_space.parameters + ] remaining_parameters = copy.deepcopy(pytrial.parameters) # Traverse the conditional tree using a BFS. while parameter_configs and remaining_parameters: From 4016de23852ec1febbe9cb5d8128af992bbb2b5e Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 21 Jul 2022 16:53:09 -0700 Subject: [PATCH 31/36] Clean the debugging logs. --- google/cloud/aiplatform/vizier/pyvizier/study_config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/study_config.py b/google/cloud/aiplatform/vizier/pyvizier/study_config.py index 86c6130cc1..0314e1442f 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/study_config.py +++ b/google/cloud/aiplatform/vizier/pyvizier/study_config.py @@ -266,9 +266,6 @@ def to_proto(self) -> study_pb2.StudySpec: if isinstance( auto_stop_proto, study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec ): - for method_name in dir(proto.decay_curve_stopping_spec): - if callable(getattr(proto.decay_curve_stopping_spec, method_name)): - print(method_name) proto.decay_curve_stopping_spec = copy.deepcopy(auto_stop_proto) elif isinstance( auto_stop_proto, study_pb2.StudySpec.DecayCurveAutomatedStoppingSpec From 04108e901dbd6a0145892433edb3d4ffb6661dd3 Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 21 Jul 2022 16:55:08 -0700 Subject: [PATCH 32/36] Decouple the Study, Trial and the aiplatform to make the sample test pass. --- google/cloud/aiplatform/__init__.py | 4 --- tests/system/aiplatform/test_vizier.py | 12 ++++--- tests/unit/aiplatform/test_vizier.py | 50 +++++++++++++------------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 26dc68f5bd..88b450460f 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -52,8 +52,6 @@ CustomJob, HyperparameterTuningJob, ) -from google.cloud.aiplatform.vizier import Study -from google.cloud.aiplatform.vizier import Trial from google.cloud.aiplatform.pipeline_jobs import PipelineJob from google.cloud.aiplatform.tensorboard import ( Tensorboard, @@ -141,7 +139,6 @@ "PipelineJob", "PrivateEndpoint", "SequenceToSequencePlusForecastingTrainingJob", - "Study", "TabularDataset", "Tensorboard", "TensorboardExperiment", @@ -149,6 +146,5 @@ "TensorboardTimeSeries", "TextDataset", "TimeSeriesDataset", - "Trial", "VideoDataset", ) diff --git a/tests/system/aiplatform/test_vizier.py b/tests/system/aiplatform/test_vizier.py index 64a20f4d6c..7a43ea6997 100644 --- a/tests/system/aiplatform/test_vizier.py +++ b/tests/system/aiplatform/test_vizier.py @@ -2,6 +2,8 @@ from google.api_core import exceptions from google.cloud import aiplatform +from google.cloud.aiplatform.vizier import Study +from google.cloud.aiplatform.vizier import Trial from tests.system.aiplatform import e2e_base from google.cloud.aiplatform.vizier import pyvizier @@ -33,7 +35,7 @@ def test_vizier_lifecycle(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=self._temp_prefix, problem=sc ) shared_state["resources"] = [study] @@ -71,13 +73,13 @@ def test_vizier_study_deletion(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=self._temp_prefix, problem=sc ) study.delete() with pytest.raises(exceptions.NotFound): - study = aiplatform.Study(study_id=study.name) + study = Study(study_id=study.name) def test_vizier_trial_deletion(self, shared_state): aiplatform.init( @@ -100,11 +102,11 @@ def test_vizier_trial_deletion(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=self._temp_prefix, problem=sc ) trials = study.suggest(count=1, worker="halio_test_worker") trials[0].delete() with pytest.raises(exceptions.NotFound): - study = aiplatform.Trial(study_id=study.name, trial_name=trials[0].name) + study = Trial(study_id=study.name, trial_name=trials[0].name) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index cacb4d6c1a..062c994364 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -26,6 +26,8 @@ from google.api_core import operation from google.cloud import aiplatform +from google.cloud.aiplatform.vizier import Study +from google.cloud.aiplatform.vizier import Trial from google.cloud.aiplatform import initializer from google.cloud.aiplatform.vizier import pyvizier @@ -283,14 +285,14 @@ def test_create_study(self, create_study_mock): ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) create_study_mock.assert_called_once_with( parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY ) - assert type(study) == aiplatform.Study + assert type(study) == Study @pytest.mark.usefixtures("get_study_mock") def test_create_study_already_exists( @@ -313,7 +315,7 @@ def test_create_study_already_exists( ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -321,7 +323,7 @@ def test_create_study_already_exists( request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME}, credentials=ANY, ) - assert type(study) == aiplatform.Study + assert type(study) == Study @pytest.mark.usefixtures("get_study_mock") def test_materialize_study_config(self, create_study_mock): @@ -341,7 +343,7 @@ def test_materialize_study_config(self, create_study_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -370,7 +372,7 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -384,15 +386,15 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): }, credentials=ANY, ) - assert type(trials[0]) == aiplatform.Trial + assert type(trials[0]) == Trial @pytest.mark.usefixtures("get_study_mock") def test_from_uid(self): aiplatform.init(project=_TEST_PROJECT) - study = aiplatform.Study.from_uid(uid=_TEST_STUDY_ID) + study = Study.from_uid(uid=_TEST_STUDY_ID) - assert type(study) == aiplatform.Study + assert type(study) == Study assert study.name == _TEST_STUDY_ID @pytest.mark.usefixtures("get_study_mock") @@ -413,7 +415,7 @@ def test_delete(self, create_study_mock, delete_study_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -441,7 +443,7 @@ def test_optimal_trials(self, list_optimal_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -450,7 +452,7 @@ def test_optimal_trials(self, list_optimal_trials_mock): list_optimal_trials_mock.assert_called_once_with( request={"parent": _TEST_STUDY_NAME}, credentials=ANY ) - assert type(trials[0]) == aiplatform.Trial + assert type(trials[0]) == Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") def test_list_trials(self, list_trials_mock): @@ -470,7 +472,7 @@ def test_list_trials(self, list_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) @@ -479,7 +481,7 @@ def test_list_trials(self, list_trials_mock): list_trials_mock.assert_called_once_with( request={"parent": _TEST_STUDY_NAME}, credentials=ANY ) - assert type(trials[0]) == aiplatform.Trial + assert type(trials[0]) == Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock") def test_get_trial(self, get_trial_mock): @@ -499,14 +501,14 @@ def test_get_trial(self, get_trial_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = aiplatform.Study.create_or_load( + study = Study.create_or_load( display_name=_TEST_DISPLAY_NAME, problem=sc ) trial = study.get_trial(1) get_trial_mock.assert_called_once_with(name=_TEST_TRIAL_NAME, retry=ANY) - assert type(trial) == aiplatform.Trial + assert type(trial) == Trial @pytest.mark.usefixtures("google_auth_mock") @@ -521,17 +523,17 @@ def teardown_method(self): @pytest.mark.usefixtures("get_trial_mock") def test_delete(self, delete_trial_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) trial.delete() delete_trial_mock.assert_called_once_with(name=_TEST_TRIAL_NAME) - assert type(trial) == aiplatform.Trial + assert type(trial) == Trial @pytest.mark.usefixtures("get_trial_mock") def test_complete(self, complete_trial_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() measurement.metrics["y"] = 4 @@ -555,7 +557,7 @@ def test_complete(self, complete_trial_mock): @pytest.mark.usefixtures("get_trial_mock") def test_complete_empty_measurement(self, complete_trial_empty_measurement_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() measurement.metrics["y"] = 4 @@ -579,7 +581,7 @@ def test_complete_empty_measurement(self, complete_trial_empty_measurement_mock) @pytest.mark.usefixtures("get_trial_mock") def test_should_stop(self, should_stop_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) should_stop = trial.should_stop() @@ -591,7 +593,7 @@ def test_should_stop(self, should_stop_mock): @pytest.mark.usefixtures("get_trial_mock") def test_add_measurement(self, add_measurement_mock): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() measurement.metrics["y"] = 4 @@ -611,7 +613,7 @@ def test_add_measurement(self, add_measurement_mock): @pytest.mark.usefixtures("get_trial_mock") def test_properties(self): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() measurement.metrics["y"] = 4 @@ -628,7 +630,7 @@ def test_properties(self): @pytest.mark.usefixtures("get_trial_mock") def test_materialize(self): aiplatform.init(project=_TEST_PROJECT) - trial = aiplatform.Trial(trial_name=_TEST_TRIAL_NAME) + trial = Trial(trial_name=_TEST_TRIAL_NAME) measurement = pyvizier.Measurement() measurement.metrics["y"] = 4 From 49efcd74b74787a6d28c575313a04bfbd90ed6cf Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 21 Jul 2022 16:55:39 -0700 Subject: [PATCH 33/36] Fixed the issue in the system test got an unexpected keyword argument 'credentials' --- google/cloud/aiplatform/vizier/study.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py index 023e1e5a1c..ca9ba72d4c 100644 --- a/google/cloud/aiplatform/vizier/study.py +++ b/google/cloud/aiplatform/vizier/study.py @@ -146,7 +146,6 @@ def create_or_load( location, ), study=study, - credentials=credentials, ) except exceptions.AlreadyExists: _LOGGER.info("The study is already created. Using existing study.") @@ -158,7 +157,6 @@ def create_or_load( ), "display_name": display_name, }, - credentials=credentials, ) return Study(study.name) @@ -195,7 +193,7 @@ def trials( """ list_trials_request = {"parent": self.resource_name} trials_response = self.api_client.list_trials( - request=list_trials_request, credentials=self.credentials + request=list_trials_request ) return [ Trial._construct_sdk_resource_from_gapic( @@ -215,7 +213,7 @@ def optimal_trials(self) -> Collection[client_abc.TrialInterface]: """ list_optimal_trials_request = {"parent": self.resource_name} optimal_trials_response = self.api_client.list_optimal_trials( - request=list_optimal_trials_request, credentials=self.credentials + request=list_optimal_trials_request ) return [ Trial._construct_sdk_resource_from_gapic( @@ -282,7 +280,6 @@ def suggest( "suggestion_count": count, "client_id": worker, }, - credentials=self.credentials, ) _LOGGER.log_action_started_against_resource_with_lro( "Suggest", "study", self.__class__, suggest_trials_lro @@ -303,5 +300,5 @@ def suggest( def delete(self) -> None: """Deletes the study.""" self.api_client.delete_study( - name=self.resource_name, credentials=self.credentials + name=self.resource_name ) From f296d9a417629f037950bc20608a6883706d95b8 Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 21 Jul 2022 17:02:39 -0700 Subject: [PATCH 34/36] Ran the nox -s blacken to format the python file. --- google/cloud/aiplatform/vizier/study.py | 8 ++----- tests/system/aiplatform/test_vizier.py | 12 +++------- tests/unit/aiplatform/test_vizier.py | 32 +++++++------------------ 3 files changed, 13 insertions(+), 39 deletions(-) diff --git a/google/cloud/aiplatform/vizier/study.py b/google/cloud/aiplatform/vizier/study.py index ca9ba72d4c..79ad3c2961 100644 --- a/google/cloud/aiplatform/vizier/study.py +++ b/google/cloud/aiplatform/vizier/study.py @@ -192,9 +192,7 @@ def trials( to the study. """ list_trials_request = {"parent": self.resource_name} - trials_response = self.api_client.list_trials( - request=list_trials_request - ) + trials_response = self.api_client.list_trials(request=list_trials_request) return [ Trial._construct_sdk_resource_from_gapic( trial, @@ -299,6 +297,4 @@ def suggest( def delete(self) -> None: """Deletes the study.""" - self.api_client.delete_study( - name=self.resource_name - ) + self.api_client.delete_study(name=self.resource_name) diff --git a/tests/system/aiplatform/test_vizier.py b/tests/system/aiplatform/test_vizier.py index 7a43ea6997..0f592cc7ce 100644 --- a/tests/system/aiplatform/test_vizier.py +++ b/tests/system/aiplatform/test_vizier.py @@ -35,9 +35,7 @@ def test_vizier_lifecycle(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = Study.create_or_load( - display_name=self._temp_prefix, problem=sc - ) + study = Study.create_or_load(display_name=self._temp_prefix, problem=sc) shared_state["resources"] = [study] trials = study.suggest(count=3, worker="halio_test_worker") for trial in trials: @@ -73,9 +71,7 @@ def test_vizier_study_deletion(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = Study.create_or_load( - display_name=self._temp_prefix, problem=sc - ) + study = Study.create_or_load(display_name=self._temp_prefix, problem=sc) study.delete() with pytest.raises(exceptions.NotFound): @@ -102,9 +98,7 @@ def test_vizier_trial_deletion(self, shared_state): pyvizier.AutomatedStoppingConfig.decay_curve_stopping_config(use_steps=True) ) - study = Study.create_or_load( - display_name=self._temp_prefix, problem=sc - ) + study = Study.create_or_load(display_name=self._temp_prefix, problem=sc) trials = study.suggest(count=1, worker="halio_test_worker") trials[0].delete() diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index 062c994364..76a0e0c852 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -285,9 +285,7 @@ def test_create_study(self, create_study_mock): ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) create_study_mock.assert_called_once_with( parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY @@ -315,9 +313,7 @@ def test_create_study_already_exists( ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) lookup_study_mock.assert_called_once_with( request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME}, @@ -343,9 +339,7 @@ def test_materialize_study_config(self, create_study_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) study_config = study.materialize_study_config() @@ -372,9 +366,7 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) trials = study.suggest(count=5, worker="test_worker") @@ -415,9 +407,7 @@ def test_delete(self, create_study_mock, delete_study_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) study.delete() @@ -443,9 +433,7 @@ def test_optimal_trials(self, list_optimal_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) trials = study.optimal_trials() @@ -472,9 +460,7 @@ def test_list_trials(self, list_trials_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) trials = study.trials() @@ -501,9 +487,7 @@ def test_get_trial(self, get_trial_mock): scale_type=pyvizier.ScaleType.LINEAR, ) root.add_categorical_param(_TEST_PARAMETER_ID_2, _TEST_PARAMETER_VALUE_2) - study = Study.create_or_load( - display_name=_TEST_DISPLAY_NAME, problem=sc - ) + study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) trial = study.get_trial(1) From 116f4dede0d552b8ae91ebbd57a82b5ccdea0dd6 Mon Sep 17 00:00:00 2001 From: halio-g Date: Fri, 22 Jul 2022 11:56:00 -0700 Subject: [PATCH 35/36] Fixed the unit test failure. --- tests/unit/aiplatform/test_vizier.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/unit/aiplatform/test_vizier.py b/tests/unit/aiplatform/test_vizier.py index 76a0e0c852..9b47761368 100644 --- a/tests/unit/aiplatform/test_vizier.py +++ b/tests/unit/aiplatform/test_vizier.py @@ -288,7 +288,7 @@ def test_create_study(self, create_study_mock): study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) create_study_mock.assert_called_once_with( - parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY + parent=_TEST_PARENT, study=_TEST_STUDY ) assert type(study) == Study @@ -316,8 +316,7 @@ def test_create_study_already_exists( study = Study.create_or_load(display_name=_TEST_DISPLAY_NAME, problem=sc) lookup_study_mock.assert_called_once_with( - request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME}, - credentials=ANY, + request={"parent": _TEST_PARENT, "display_name": _TEST_DISPLAY_NAME} ) assert type(study) == Study @@ -344,7 +343,7 @@ def test_materialize_study_config(self, create_study_mock): study_config = study.materialize_study_config() create_study_mock.assert_called_once_with( - parent=_TEST_PARENT, study=_TEST_STUDY, credentials=ANY + parent=_TEST_PARENT, study=_TEST_STUDY ) assert type(study_config) == pyvizier.StudyConfig @@ -375,8 +374,7 @@ def test_suggest(self, create_study_mock, suggest_trials_mock): "parent": _TEST_STUDY_NAME, "suggestion_count": 5, "client_id": "test_worker", - }, - credentials=ANY, + } ) assert type(trials[0]) == Trial @@ -411,9 +409,7 @@ def test_delete(self, create_study_mock, delete_study_mock): study.delete() - delete_study_mock.assert_called_once_with( - name=_TEST_STUDY_NAME, credentials=ANY - ) + delete_study_mock.assert_called_once_with(name=_TEST_STUDY_NAME) @pytest.mark.usefixtures("get_study_mock", "create_study_mock", "get_trial_mock") def test_optimal_trials(self, list_optimal_trials_mock): @@ -438,7 +434,7 @@ def test_optimal_trials(self, list_optimal_trials_mock): trials = study.optimal_trials() list_optimal_trials_mock.assert_called_once_with( - request={"parent": _TEST_STUDY_NAME}, credentials=ANY + request={"parent": _TEST_STUDY_NAME} ) assert type(trials[0]) == Trial @@ -464,9 +460,7 @@ def test_list_trials(self, list_trials_mock): trials = study.trials() - list_trials_mock.assert_called_once_with( - request={"parent": _TEST_STUDY_NAME}, credentials=ANY - ) + list_trials_mock.assert_called_once_with(request={"parent": _TEST_STUDY_NAME}) assert type(trials[0]) == Trial @pytest.mark.usefixtures("get_study_mock", "create_study_mock") From b2b2428e6b8268f2b741d8a2cb7d65e9511661bd Mon Sep 17 00:00:00 2001 From: halio-g Date: Thu, 28 Jul 2022 10:08:17 -0700 Subject: [PATCH 36/36] Add the wrapper to give more error information about the vizier import error. --- .../aiplatform/vizier/pyvizier/__init__.py | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/google/cloud/aiplatform/vizier/pyvizier/__init__.py b/google/cloud/aiplatform/vizier/pyvizier/__init__.py index e388ae69c8..6c5dfc412b 100644 --- a/google/cloud/aiplatform/vizier/pyvizier/__init__.py +++ b/google/cloud/aiplatform/vizier/pyvizier/__init__.py @@ -1,34 +1,40 @@ """PyVizier classes for Pythia policies.""" -from vizier.pyvizier import MetricInformation -from vizier.pyvizier import MetricsConfig -from vizier.pyvizier import MetricType -from vizier.pyvizier import ( - ObjectiveMetricGoal, -) -from vizier.pyvizier import ProblemStatement -from vizier.pyvizier import SearchSpace -from vizier.pyvizier import ( - SearchSpaceSelector, -) -from vizier.pyvizier import Metadata -from vizier.pyvizier import MetadataValue -from vizier.pyvizier import Namespace -from vizier.pyvizier import ExternalType -from vizier.pyvizier import ParameterConfig -from vizier.pyvizier import ParameterType -from vizier.pyvizier import ScaleType -from vizier.pyvizier import CompletedTrial -from vizier.pyvizier import Measurement -from vizier.pyvizier import MonotypeParameterSequence -from vizier.pyvizier import Metric -from vizier.pyvizier import ParameterDict -from vizier.pyvizier import ParameterValue -from vizier.pyvizier import Trial -from vizier.pyvizier import ParameterValueTypes -from vizier.pyvizier import TrialFilter -from vizier.pyvizier import TrialStatus -from vizier.pyvizier import TrialSuggestion +try: + from vizier.pyvizier import MetricInformation + from vizier.pyvizier import MetricsConfig + from vizier.pyvizier import MetricType + from vizier.pyvizier import ( + ObjectiveMetricGoal, + ) + from vizier.pyvizier import ProblemStatement + from vizier.pyvizier import SearchSpace + from vizier.pyvizier import ( + SearchSpaceSelector, + ) + from vizier.pyvizier import Metadata + from vizier.pyvizier import MetadataValue + from vizier.pyvizier import Namespace + from vizier.pyvizier import ExternalType + from vizier.pyvizier import ParameterConfig + from vizier.pyvizier import ParameterType + from vizier.pyvizier import ScaleType + from vizier.pyvizier import CompletedTrial + from vizier.pyvizier import Measurement + from vizier.pyvizier import MonotypeParameterSequence + from vizier.pyvizier import Metric + from vizier.pyvizier import ParameterDict + from vizier.pyvizier import ParameterValue + from vizier.pyvizier import Trial + from vizier.pyvizier import ParameterValueTypes + from vizier.pyvizier import TrialFilter + from vizier.pyvizier import TrialStatus + from vizier.pyvizier import TrialSuggestion +except ImportError: + raise ImportError( + "Google-vizier is not installed, and is required to use Vizier client." + 'Please install the SDK using "pip install google-vizier==0.0.3a"' + ) from google.cloud.aiplatform.vizier.pyvizier.proto_converters import TrialConverter from google.cloud.aiplatform.vizier.pyvizier.proto_converters import (