EmuKit · BrunoKM · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021 · Mar 9, 2021
diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py
@@ -61,6 +61,19 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np.
         dvariance_dx = dSigma(X, self.model.X, self.model.kern, self.model.posterior.woodbury_inv)
         return dmean_dx, dvariance_dx
 
+    def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
+        """
+        Computes and returns model gradients of the covariance between outputs at points X1 and X2 with respect
+        to X1.
+
+        :param X1: points to compute gradients at, nd array of shape (q1, d)
+        :param X2: points for the covariance of which to compute the gradient, nd array of shape (q2, d)
+        :return: gradient of the covariance matrix of shape (q1, q2) between outputs at X1 and X2
+                 (return shape is (q1, q2, q1, d)).
+        """
+        dcov_dx1 = dCov(X1, X2, self.model.X, self.model.kern, self.model.posterior.woodbury_inv)
+        return dcov_dx1
+
     def set_data(self, X: np.ndarray, Y: np.ndarray) -> None:
         """
         Sets training data in model
@@ -171,24 +184,23 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np
     :return: Gradient of the posterior covariance of shape (q, q, q, d)
     """
     q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0]
-    dkxX_dx = np.empty((q, n, d))
-    dkxx_dx = np.empty((q, q, d))
+    # Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to
+    # x_predict (of shape (q, d))
+    dkxX_dx = np.zeros((d, q*q, n))
+    # Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to
+    # x_predict (of shape (q, d))
+    dkxx_dx = np.zeros((d, q*q, q))
     for i in range(d):
-        dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i)
-        dkxx_dx[:, :, i] = kern.dK_dX(x_predict, x_predict, i)
+        dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i)
+        dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i)
+    dkxX_dx = dkxX_dx.reshape((d, q, q, n))
+    dkxx_dx = dkxx_dx.reshape((d, q, q, q))
+    dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2))
+    dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0.
+
     K = kern.K(x_predict, x_train)
-
-    dsigma = np.zeros((q, q, q, d))
-    for i in range(q):
-        for j in range(d):
-            Ks = np.zeros((q, n))
-            Ks[i, :] = dkxX_dx[i, :, j]
-            dKss_dxi = np.zeros((q, q))
-            dKss_dxi[i, :] = dkxx_dx[i, :, j]
-            dKss_dxi[:, i] = dkxx_dx[i, :, j].T
-            dKss_dxi[i, i] = 0
-            dsigma[:, :, i, j] = dKss_dxi - Ks @ w_inv @ K.T - K @ w_inv @ Ks.T
-    return dsigma
+    dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T
+    return dsigma.transpose((2, 3, 1, 0))
 
 
 def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.ndarray) -> np.ndarray:
@@ -210,6 +222,34 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.
             dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten()
     return dmu
 
+
+def dCov(x1: np.ndarray, x2: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np.ndarray) -> np.ndarray:
+    """
+    Compute the derivative of the posterior covariance matrix between prediction inputs x1 and x2
+    (of shape (q1, q2)) with respect to x1
+
+    :param x1: Prediction inputs of shape (q1, d)
+    :param x2: Prediction inputs of shape (q2, d)
+    :param x_train: Training inputs of shape (n, d)
+    :param kern: Covariance of the GP model
+    :param w_inv: Woodbury inverse of the posterior fit of the GP
+    :return: nd array of shape (q1, q2, q1, d) representing the gradient of the posterior covariance between x1 and x2,
+        where res[:, :, i, j] is the gradient of the covariance between outputs at x1 and x2 with respect to x1[i, j]
+    """
+    q1, q2, d, n = x1.shape[0], x2.shape[0], x1.shape[1], x_train.shape[0]
+    dkx1X_dx = np.zeros((d, q1*q1, n))
+    dkx1x2_dx = np.zeros((d, q1*q1, q2))
+    for i in range(d):
+        dkx1X_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x_train, i)
+        dkx1x2_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x2, i)
+    dkx1X_dx = dkx1X_dx.reshape((d, q1, q1, n))
+    dkx1x2_dx = dkx1x2_dx.reshape((d, q1, q1, q2))
+
+    K_Xx2 = kern.K(x_train, x2)
+    dcov = dkx1x2_dx - dkx1X_dx @ w_inv @ K_Xx2
+    return dcov.transpose((2, 3, 1, 0))
+
+
 class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel):
     """
     A wrapper around GPy multi-output models.

diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py
@@ -0,0 +1,53 @@
+import GPy
+import numpy as np
+import pytest
+
+from emukit.model_wrappers.gpy_model_wrappers import GPyModelWrapper
+
+
+@pytest.fixture
+def test_data(gpy_model):
+    np.random.seed(42)
+    return np.random.randn(5, gpy_model.X.shape[1])
+
+
+@pytest.fixture
+def test_data2(gpy_model):
+    np.random.seed(42)
+    return np.random.randn(4, gpy_model.X.shape[1])
+
+
+def test_joint_prediction_gradients(gpy_model, test_data):
+    epsilon = 1e-5
+    mean, cov = gpy_model.predict_with_full_covariance(test_data)
+    # Get the gradients
+    mean_dx, cov_dx = gpy_model.get_joint_prediction_gradients(test_data)
+
+    for i in range(test_data.shape[0]):  # Iterate over each test point
+        for j in range(test_data.shape[1]):  # Iterate over each dimension
+            # Approximate the gradient numerically
+            perturbed_input = test_data.copy()
+            perturbed_input[i, j] += epsilon
+            mean_perturbed, cov_perturbed = gpy_model.predict_with_full_covariance(perturbed_input)
+            mean_dx_numerical = (mean_perturbed - mean) / epsilon
+            cov_dx_numerical = (cov_perturbed - cov) / epsilon
+            # Check that numerical approx. similar to true gradient
+            assert pytest.approx(mean_dx_numerical.ravel(), abs=1e-8, rel=1e-2) == mean_dx[:, i, j]
+            assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j]
+
+
+def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2):
+    epsilon = 1e-5
+    cov = gpy_model.get_covariance_between_points(test_data, test_data2)
+    # Get the gradients
+    cov_dx = gpy_model.get_covariance_between_points_gradients(test_data, test_data2)
+
+    for i in range(test_data.shape[0]):  # Iterate over each test point
+        for j in range(test_data.shape[1]):  # Iterate over each dimension
+            # Approximate the gradient numerically
+            perturbed_input = test_data.copy()
+            perturbed_input[i, j] += epsilon
+            cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2)
+            cov_dx_numerical = (cov_perturbed - cov) / epsilon
+            # Check that numerical approx. similar to true gradient
+            assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j]