Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encoded DeepGPs. #873

Merged
merged 13 commits into from
Sep 18, 2024
19 changes: 15 additions & 4 deletions tests/unit/models/gpflux/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import numpy.testing as npt
import pytest
import tensorflow as tf
from check_shapes import inherit_check_shapes
from gpflow.conditionals.util import sample_mvn
from gpflow.keras import tf_keras
from gpflux.helpers import construct_basic_inducing_variables, construct_basic_kernel
Expand All @@ -30,6 +29,7 @@
from tests.util.misc import random_seed
from trieste.data import Dataset
from trieste.models.gpflux import GPfluxPredictor
from trieste.space import CategoricalSearchSpace, EncoderFunction, one_hot_encoder
from trieste.types import TensorType


Expand All @@ -38,8 +38,9 @@ def __init__(
self,
optimizer: tf_keras.optimizers.Optimizer | None = None,
likelihood: gpflow.likelihoods.Likelihood = gpflow.likelihoods.Gaussian(0.01),
encoder: EncoderFunction | None = None,
):
super().__init__(optimizer=optimizer)
super().__init__(optimizer=optimizer, encoder=encoder)

if optimizer is None:
self._optimizer = tf_keras.optimizers.Adam()
Expand All @@ -61,8 +62,7 @@ def model_keras(self) -> tf_keras.Model:
def optimizer(self) -> tf_keras.optimizers.Optimizer:
return self._optimizer

@inherit_check_shapes
def sample(self, query_points: TensorType, num_samples: int) -> TensorType:
def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType:
# Taken from GPflow implementation of `GPModel.predict_f_samples` in gpflow.models.model
mean, cov = self._model_gpflux.predict_f(query_points, full_cov=True)
mean_for_sample = tf.linalg.adjoint(mean)
Expand Down Expand Up @@ -152,3 +152,14 @@ def test_gpflux_predictor_get_observation_noise_raises_for_non_gaussian_likeliho

with pytest.raises(NotImplementedError):
model.get_observation_noise()


def test_gpflux_categorical_predict() -> None:
search_space = CategoricalSearchSpace(["Red", "Green", "Blue"])
query_points = search_space.sample(10)
model = _QuadraticPredictor(encoder=one_hot_encoder(search_space))
mean, variance = model.predict(query_points)
assert mean.shape == [10, 1]
assert variance.shape == [10, 1]
npt.assert_allclose(mean, [[1.0]] * 10, rtol=0.01)
npt.assert_allclose(variance, [[1.0]] * 10, rtol=0.01)
29 changes: 20 additions & 9 deletions trieste/models/gpflux/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,46 @@

from abc import ABC, abstractmethod

from check_shapes import inherit_check_shapes
from gpflow.base import Module
from gpflow.keras import tf_keras

from ...space import EncoderFunction
from ...types import TensorType
from ..interfaces import SupportsGetObservationNoise, SupportsPredictY
from ..interfaces import EncodedSupportsPredictY, SupportsGetObservationNoise
from ..optimizer import KerasOptimizer


class GPfluxPredictor(SupportsGetObservationNoise, SupportsPredictY, ABC):
class GPfluxPredictor(SupportsGetObservationNoise, EncodedSupportsPredictY, ABC):
"""
A trainable wrapper for a GPflux deep Gaussian process model. The code assumes subclasses
will use the Keras `fit` method for training, and so they should provide access to both a
`model_keras` and `model_gpflux`.
"""

def __init__(self, optimizer: KerasOptimizer | None = None):
def __init__(
self, optimizer: KerasOptimizer | None = None, encoder: EncoderFunction | None = None
):
"""
:param optimizer: The optimizer wrapper containing the optimizer with which to train the
model and arguments for the wrapper and the optimizer. The optimizer must
be an instance of a :class:`~tf.optimizers.Optimizer`. Defaults to
:class:`~tf.optimizers.Adam` optimizer with 0.01 learning rate.
:param encoder: Optional encoder with which to transform query points before
generating predictions.
"""
if optimizer is None:
optimizer = KerasOptimizer(tf_keras.optimizers.Adam(0.01))

self._optimizer = optimizer
self._encoder = encoder

@property
def encoder(self) -> EncoderFunction | None:
return self._encoder

@encoder.setter
def encoder(self, encoder: EncoderFunction | None) -> None:
self._encoder = encoder

@property
@abstractmethod
Expand All @@ -59,18 +72,16 @@ def optimizer(self) -> KerasOptimizer:
"""The optimizer wrapper for training the model."""
return self._optimizer

@inherit_check_shapes
def predict(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
"""Note: unless otherwise noted, this returns the mean and variance of the last layer
conditioned on one sample from the previous layers."""
return self.model_gpflux.predict_f(query_points)

@abstractmethod
def sample(self, query_points: TensorType, num_samples: int) -> TensorType:
def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType:
raise NotImplementedError

@inherit_check_shapes
def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]:
"""Note: unless otherwise noted, this will return the prediction conditioned on one sample
from the lower layers."""
f_mean, f_var = self.model_gpflux.predict_f(query_points)
Expand Down
18 changes: 10 additions & 8 deletions trieste/models/gpflux/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import dill
import gpflow
import tensorflow as tf
from check_shapes import inherit_check_shapes
from gpflow.inducing_variables import InducingPoints
from gpflow.keras import tf_keras
from gpflux.layers import GPLayer, LatentVariableLayer
Expand All @@ -28,12 +27,13 @@

from ... import logging
from ...data import Dataset
from ...space import EncoderFunction
from ...types import TensorType
from ..interfaces import (
EncodedTrainableProbabilisticModel,
HasReparamSampler,
HasTrajectorySampler,
ReparametrizationSampler,
TrainableProbabilisticModel,
TrajectorySampler,
)
from ..optimizer import KerasOptimizer
Expand All @@ -50,7 +50,7 @@


class DeepGaussianProcess(
GPfluxPredictor, TrainableProbabilisticModel, HasReparamSampler, HasTrajectorySampler
GPfluxPredictor, EncodedTrainableProbabilisticModel, HasReparamSampler, HasTrajectorySampler
):
"""
A :class:`TrainableProbabilisticModel` wrapper for a GPflux :class:`~gpflux.models.DeepGP` with
Expand All @@ -65,6 +65,7 @@ def __init__(
num_rff_features: int = 1000,
continuous_optimisation: bool = True,
compile_args: Optional[Mapping[str, Any]] = None,
encoder: EncoderFunction | None = None,
):
"""
:param model: The underlying GPflux deep Gaussian process model. Passing in a named closure
Expand All @@ -88,6 +89,8 @@ def __init__(
See https://keras.io/api/models/model_training_apis/#compile-method for a
list of possible arguments. The ``optimizer`` and ``metrics`` arguments
must not be included.
:param encoder: Optional encoder with which to transform query points before
generating predictions.
:raise ValueError: If ``model`` has unsupported layers, ``num_rff_features`` is less than 0,
if the ``optimizer`` is not of a supported type, or `compile_args` contains
disallowed arguments.
Expand All @@ -113,7 +116,7 @@ def __init__(
f"`LatentVariableLayer`, received {type(layer)} instead."
)

super().__init__(optimizer)
super().__init__(optimizer, encoder)

if num_rff_features <= 0:
raise ValueError(
Expand Down Expand Up @@ -304,8 +307,7 @@ def model_gpflux(self) -> DeepGP:
def model_keras(self) -> tf_keras.Model:
return self._model_keras

@inherit_check_shapes
def sample(self, query_points: TensorType, num_samples: int) -> TensorType:
def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType:
trajectory = self.trajectory_sampler().get_trajectory()
expanded_query_points = tf.expand_dims(query_points, -2) # [N, 1, D]
tiled_query_points = tf.tile(expanded_query_points, [1, num_samples, 1]) # [N, S, D]
Expand All @@ -329,7 +331,7 @@ def trajectory_sampler(self) -> TrajectorySampler[GPfluxPredictor]:
"""
return DeepGaussianProcessDecoupledTrajectorySampler(self, self._num_rff_features)

def update(self, dataset: Dataset) -> None:
def update_encoded(self, dataset: Dataset) -> None:
inputs = dataset.query_points
new_num_data = inputs.shape[0]
self.model_gpflux.num_data = new_num_data
Expand Down Expand Up @@ -366,7 +368,7 @@ def update(self, dataset: Dataset) -> None:

inputs = layer(inputs)

def optimize(self, dataset: Dataset) -> tf_keras.callbacks.History:
def optimize_encoded(self, dataset: Dataset) -> tf_keras.callbacks.History:
"""
Optimize the model with the specified `dataset`.
:param dataset: The data with which to optimize the `model`.
Expand Down
8 changes: 7 additions & 1 deletion trieste/models/gpflux/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(self, sample_size: int, model: GPfluxPredictor):
)
for _ in range(len(self._model_gpflux.f_layers))
]
self._encode = lambda x: model.encode(x)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of curiosity, does the following work?

Suggested change
self._encode = lambda x: model.encode(x)
self._encode = model.encode

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I think it does.


@property
def _model_gpflux(self) -> tf.Module:
Expand All @@ -96,7 +97,9 @@ def sample(self, at: TensorType, *, jitter: float = DEFAULTS.JITTER) -> TensorTy
tf.debugging.assert_shapes([(at, [..., 1, None])])
tf.debugging.assert_greater_equal(jitter, 0.0)

samples = tf.repeat(at[..., None, :, :], self._sample_size, axis=-3) # [..., S, 1, D]
samples = tf.repeat(
self._encode(at[..., None, :, :]), self._sample_size, axis=-3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason not to encode first thing?

at = self._encode(at)

samples = tf.repeat(at[..., None, :, :], self._sample_size, axis=-3)  # [..., S, 1, D]

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainly compactness.

) # [..., S, 1, D]
for i, layer in enumerate(self._model_gpflux.f_layers):
if isinstance(layer, LatentVariableLayer):
if not self._initialized:
Expand Down Expand Up @@ -477,6 +480,8 @@ def __init__(self, model: GPfluxPredictor, num_features: int):
for i in range(len(model.model_gpflux.f_layers))
]

self._encode = lambda x: model.encode(x)

@tf.function
def __call__(self, x: TensorType) -> TensorType:
"""
Expand All @@ -486,6 +491,7 @@ def __call__(self, x: TensorType) -> TensorType:
the batch dimension, and `D` is the input dimensionality.
:return: Trajectory samples with shape `[N, B, L]`, where `L` is the number of outputs.
"""
x = self._encode(x)
for layer in self._sampling_layers:
x = layer(x)
return x
Expand Down
Loading