-
Notifications
You must be signed in to change notification settings - Fork 127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add gradient calculation for the covariance between points in GPyModelWrapper #347
base: main
Are you sure you want to change the base?
Changes from 11 commits
49f5389
d0e25c8
bda97ad
6738faa
6ed9094
e7b436b
8e16990
38a0691
d6d6b0f
5bd6383
d34de42
fe9b0d7
bcf4416
26960ee
2bb36d2
9d52426
c8b9f83
f345342
c280f6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -118,6 +118,40 @@ def get_covariance_between_points(self, X1: np.ndarray, X2: np.ndarray) -> np.nd | |||||
""" | ||||||
return self.model.posterior_covariance_between_points(X1, X2, include_likelihood=False) | ||||||
|
||||||
def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: | ||||||
""" | ||||||
Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2 | ||||||
with respect to x1. | ||||||
|
||||||
:param x1: Prediction inputs of shape (q1, d) | ||||||
:param x2: Prediction inputs of shape (q2, d) | ||||||
:param x_train: Training inputs of shape (n_train, d) | ||||||
:param kern: Covariance of the GP model | ||||||
:param w_inv: Woodbury inverse of the posterior fit of the GP | ||||||
:return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance between x1 and x2 | ||||||
with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) with respect to x1[i, k] | ||||||
""" | ||||||
# Get the relevent shapes | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||||||
q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0] | ||||||
# Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train | ||||||
cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train)) | ||||||
# Instatiate an array to hold gradients of prior covariance between outputs at X1 and X2 | ||||||
BrunoKM marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
cov_X1_X2_grad = np.zeros((input_dim, q1, q2)) | ||||||
# Calculate the gradient wrt. X1 of these prior covariances. GPy API allows for doing so | ||||||
# only one dimension at a time, hence need to iterate over all input dimensions | ||||||
for i in range(input_dim): | ||||||
# Calculate the gradient wrt. X1 of the prior covariance between X1 and X_train | ||||||
cov_X1_Xtrain_grad[i, :, :] = self.model.kern.dK_dX(X1, self.model.X, i) | ||||||
# Calculate the gradient wrt. X1 of the prior covariance between X1 and X2 | ||||||
cov_X1_X2_grad[i, :, :] = self.model.kern.dK_dX(X1, X2, i) | ||||||
|
||||||
# Get the prior covariance between outputs at x_train and X2 | ||||||
cov_Xtrain_X2 = self.model.kern.K(self.model.X, X2) | ||||||
# Calculate the gradient of the posterior covariance between outputs at X1 and X2 | ||||||
cov_grad = cov_X1_X2_grad - cov_X1_Xtrain_grad @ self.model.posterior.woodbury_inv @ cov_Xtrain_X2 | ||||||
return cov_grad.transpose((1, 2, 0)) | ||||||
|
||||||
|
||||||
@property | ||||||
def X(self) -> np.ndarray: | ||||||
""" | ||||||
|
@@ -182,24 +216,23 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np | |||||
:return: Gradient of the posterior covariance of shape (q, q, q, d) | ||||||
""" | ||||||
q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] | ||||||
dkxX_dx = np.empty((q, n, d)) | ||||||
dkxx_dx = np.empty((q, q, d)) | ||||||
# Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to | ||||||
# x_predict (of shape (q, d)) | ||||||
dkxX_dx = np.zeros((d, q*q, n)) | ||||||
# Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to | ||||||
# x_predict (of shape (q, d)) | ||||||
dkxx_dx = np.zeros((d, q*q, q)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Naming here is a bit unfortunate. I know it was already there, but do you have any ideas how to improve it? the only way to tell the two vars apart is one upper/lower case X, and that's so poor to read There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I renamed it to something more verbose, let me know if you prefer it! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to improve it, let me know what you think! |
||||||
for i in range(d): | ||||||
dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i) | ||||||
dkxx_dx[:, :, i] = kern.dK_dX(x_predict, x_predict, i) | ||||||
dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) | ||||||
dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) | ||||||
dkxX_dx = dkxX_dx.reshape((d, q, q, n)) | ||||||
dkxx_dx = dkxx_dx.reshape((d, q, q, q)) | ||||||
dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2)) | ||||||
dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. | ||||||
|
||||||
K = kern.K(x_predict, x_train) | ||||||
|
||||||
dsigma = np.zeros((q, q, q, d)) | ||||||
for i in range(q): | ||||||
for j in range(d): | ||||||
Ks = np.zeros((q, n)) | ||||||
Ks[i, :] = dkxX_dx[i, :, j] | ||||||
dKss_dxi = np.zeros((q, q)) | ||||||
dKss_dxi[i, :] = dkxx_dx[i, :, j] | ||||||
dKss_dxi[:, i] = dkxx_dx[i, :, j].T | ||||||
dKss_dxi[i, i] = 0 | ||||||
dsigma[:, :, i, j] = dKss_dxi - Ks @ w_inv @ K.T - K @ w_inv @ Ks.T | ||||||
return dsigma | ||||||
dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T | ||||||
return dsigma.transpose((2, 3, 1, 0)) | ||||||
|
||||||
|
||||||
def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.ndarray) -> np.ndarray: | ||||||
|
@@ -221,6 +254,7 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. | |||||
dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten() | ||||||
return dmu | ||||||
|
||||||
|
||||||
class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel): | ||||||
""" | ||||||
A wrapper around GPy multi-output models. | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import GPy | ||
import numpy as np | ||
import pytest | ||
|
||
from emukit.model_wrappers.gpy_model_wrappers import GPyModelWrapper | ||
|
||
|
||
@pytest.fixture | ||
def test_data(gpy_model): | ||
np.random.seed(42) | ||
return np.random.randn(5, gpy_model.X.shape[1]) | ||
|
||
|
||
@pytest.fixture | ||
def test_data2(gpy_model): | ||
np.random.seed(42) | ||
return np.random.randn(4, gpy_model.X.shape[1]) | ||
|
||
|
||
def test_joint_prediction_gradients(gpy_model, test_data): | ||
epsilon = 1e-5 | ||
mean, cov = gpy_model.predict_with_full_covariance(test_data) | ||
# Get the gradients | ||
mean_dx, cov_dx = gpy_model.get_joint_prediction_gradients(test_data) | ||
|
||
for i in range(test_data.shape[0]): # Iterate over each test point | ||
for j in range(test_data.shape[1]): # Iterate over each dimension | ||
# Approximate the gradient numerically | ||
perturbed_input = test_data.copy() | ||
perturbed_input[i, j] += epsilon | ||
mean_perturbed, cov_perturbed = gpy_model.predict_with_full_covariance(perturbed_input) | ||
mean_dx_numerical = (mean_perturbed - mean) / epsilon | ||
cov_dx_numerical = (cov_perturbed - cov) / epsilon | ||
# Check that numerical approx. similar to true gradient | ||
assert pytest.approx(mean_dx_numerical.ravel(), abs=1e-8, rel=1e-2) == mean_dx[:, i, j] | ||
assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] | ||
|
||
|
||
def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2): | ||
epsilon = 1e-5 | ||
cov = gpy_model.get_covariance_between_points(test_data, test_data2) | ||
# Get the gradients | ||
cov_dx = gpy_model.get_covariance_between_points_gradients(test_data, test_data2) | ||
|
||
for i in range(test_data.shape[0]): # Iterate over each test point | ||
for j in range(test_data.shape[1]): # Iterate over each dimension | ||
# Approximate the gradient numerically | ||
perturbed_input = test_data.copy() | ||
perturbed_input[i, j] += epsilon | ||
cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) | ||
cov_dx_numerical = (cov_perturbed[i] - cov[i]) / epsilon | ||
# Check that numerical approx. similar to true gradient | ||
assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[i, :, j] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose this is a bit out of date now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed redundant flags