EmuKit · BrunoKM · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021 · Mar 9, 2021
diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py
@@ -118,6 +118,40 @@ def get_covariance_between_points(self, X1: np.ndarray, X2: np.ndarray) -> np.nd
         """
         return self.model.posterior_covariance_between_points(X1, X2, include_likelihood=False)
 
+    def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
+        """
+        Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2
+        with respect to x1.
+
+        :param x1: Prediction inputs of shape (q1, d)
+        :param x2: Prediction inputs of shape (q2, d)
+        :param x_train: Training inputs of shape (n_train, d)
+        :param kern: Covariance of the GP model
+        :param w_inv: Woodbury inverse of the posterior fit of the GP
+        :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance between x1 and x2
+            with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) with respect to x1[i, k]
+        """
+        # Get the relevent shapes
-        # Get the relevent shapes
+        # Get the relevant shapes
-        # Get the relevent shapes
+        # Get the relevant shapes
+        q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0]
+        # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train
+        cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train))
+        # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X2
+        cov_X1_X2_grad = np.zeros((input_dim, q1, q2))
+        # Calculate the gradient wrt. X1 of these prior covariances. GPy API allows for doing so
+        # only one dimension at a time, hence need to iterate over all input dimensions
+        for i in range(input_dim):
+            # Calculate the gradient wrt. X1 of the prior covariance between X1 and X_train
+            cov_X1_Xtrain_grad[i, :, :] = self.model.kern.dK_dX(X1, self.model.X, i)
+            # Calculate the gradient wrt. X1 of the prior covariance between X1 and X2
+            cov_X1_X2_grad[i, :, :] = self.model.kern.dK_dX(X1, X2, i)
+
+        # Get the prior covariance between outputs at x_train and X2
+        cov_Xtrain_X2 = self.model.kern.K(self.model.X, X2)
+        # Calculate the gradient of the posterior covariance between outputs at X1 and X2
+        cov_grad = cov_X1_X2_grad - cov_X1_Xtrain_grad @ self.model.posterior.woodbury_inv @ cov_Xtrain_X2
+        return cov_grad.transpose((1, 2, 0))
+
+
     @property
     def X(self) -> np.ndarray:
         """
@@ -182,24 +216,23 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np
     :return: Gradient of the posterior covariance of shape (q, q, q, d)
     """
     q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0]
-    dkxX_dx = np.empty((q, n, d))
-    dkxx_dx = np.empty((q, q, d))
+    # Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to
+    # x_predict (of shape (q, d))
+    dkxX_dx = np.zeros((d, q*q, n))
+    # Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to
+    # x_predict (of shape (q, d))
+    dkxx_dx = np.zeros((d, q*q, q))
     for i in range(d):
-        dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i)
-        dkxx_dx[:, :, i] = kern.dK_dX(x_predict, x_predict, i)
+        dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i)
+        dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i)
+    dkxX_dx = dkxX_dx.reshape((d, q, q, n))
+    dkxx_dx = dkxx_dx.reshape((d, q, q, q))
+    dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2))
+    dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0.
+
     K = kern.K(x_predict, x_train)
-
-    dsigma = np.zeros((q, q, q, d))
-    for i in range(q):
-        for j in range(d):
-            Ks = np.zeros((q, n))
-            Ks[i, :] = dkxX_dx[i, :, j]
-            dKss_dxi = np.zeros((q, q))
-            dKss_dxi[i, :] = dkxx_dx[i, :, j]
-            dKss_dxi[:, i] = dkxx_dx[i, :, j].T
-            dKss_dxi[i, i] = 0
-            dsigma[:, :, i, j] = dKss_dxi - Ks @ w_inv @ K.T - K @ w_inv @ Ks.T
-    return dsigma
+    dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T
+    return dsigma.transpose((2, 3, 1, 0))
 
 
 def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.ndarray) -> np.ndarray:
@@ -221,6 +254,7 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.
             dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten()
     return dmu
 
+
 class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel):
     """
     A wrapper around GPy multi-output models.

diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py
@@ -0,0 +1,53 @@
+import GPy
+import numpy as np
+import pytest
+
+from emukit.model_wrappers.gpy_model_wrappers import GPyModelWrapper
+
+
+@pytest.fixture
+def test_data(gpy_model):
+    np.random.seed(42)
+    return np.random.randn(5, gpy_model.X.shape[1])
+
+
+@pytest.fixture
+def test_data2(gpy_model):
+    np.random.seed(42)
+    return np.random.randn(4, gpy_model.X.shape[1])
+
+
+def test_joint_prediction_gradients(gpy_model, test_data):
+    epsilon = 1e-5
+    mean, cov = gpy_model.predict_with_full_covariance(test_data)
+    # Get the gradients
+    mean_dx, cov_dx = gpy_model.get_joint_prediction_gradients(test_data)
+
+    for i in range(test_data.shape[0]):  # Iterate over each test point
+        for j in range(test_data.shape[1]):  # Iterate over each dimension
+            # Approximate the gradient numerically
+            perturbed_input = test_data.copy()
+            perturbed_input[i, j] += epsilon
+            mean_perturbed, cov_perturbed = gpy_model.predict_with_full_covariance(perturbed_input)
+            mean_dx_numerical = (mean_perturbed - mean) / epsilon
+            cov_dx_numerical = (cov_perturbed - cov) / epsilon
+            # Check that numerical approx. similar to true gradient
+            assert pytest.approx(mean_dx_numerical.ravel(), abs=1e-8, rel=1e-2) == mean_dx[:, i, j]
+            assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j]
+
+
+def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2):
+    epsilon = 1e-5
+    cov = gpy_model.get_covariance_between_points(test_data, test_data2)
+    # Get the gradients
+    cov_dx = gpy_model.get_covariance_between_points_gradients(test_data, test_data2)
+
+    for i in range(test_data.shape[0]):  # Iterate over each test point
+        for j in range(test_data.shape[1]):  # Iterate over each dimension
+            # Approximate the gradient numerically
+            perturbed_input = test_data.copy()
+            perturbed_input[i, j] += epsilon
+            cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2)
+            cov_dx_numerical = (cov_perturbed[i] - cov[i]) / epsilon
+            # Check that numerical approx. similar to true gradient
+            assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[i, :, j]