Refactors Gaussian process kernels to abstract from hyperparameters

SURGroup · Mar 6, 2023 · 5564ab8 · 5564ab8
1 parent 43a5804
commit 5564ab8
Show file tree

Hide file tree

Showing 5 changed files with 110 additions and 84 deletions.
diff --git a/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py b/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py
@@ -4,7 +4,6 @@
 
 from beartype import beartype
 
-
 from UQpy.utilities.Utilities import process_random_state
 from UQpy.surrogates.baseclass.Surrogate import Surrogate
 from UQpy.utilities.ValidationTypes import RandomStateType
@@ -204,14 +203,18 @@ def fit(
                 raise NotImplementedError("Maximum likelihood estimator failed: Choose different starting point or "
                                           "increase nopt")
             t = np.argmin(fun_value)
-            self.hyperparameters = 10**minimizer[t, :]
+            self.hyperparameters = 10 ** minimizer[t, :]
 
         # Updated Correlation matrix corresponding to MLE estimates of hyperparameters
         if self.noise:
-            self.K = self.kernel.c(x=s_, s=s_, params=self.hyperparameters[:-1]) + \
-                     np.eye(nsamples)*(self.hyperparameters[-1])**2
+            self.kernel.kernel_parameter = self.hyperparameters[:-2]
+            sigma = self.hyperparameters[-2]
+            self.K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_) + \
+                     np.eye(nsamples) * (self.hyperparameters[-1]) ** 2
         else:
-            self.K = self.kernel.c(x=s_, s=s_, params=self.hyperparameters)
+            self.kernel.kernel_parameter = self.hyperparameters[:-1]
+            sigma = self.hyperparameters[-1]
+            self.K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_)
 
         self.cc = cholesky(self.K + 1e-10 * np.eye(nsamples), lower=True)
         self.alpha_ = cho_solve((self.cc, True), y_)
@@ -264,7 +267,9 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None
 
         if hyperparameters is not None:
             # This is used for MLE constraints, if constraints call 'predict' method.
-            K = self.kernel.c(x=s_, s=s_, params=kernelparameters) + \
+            self.kernel.kernel_parameter = kernelparameters[:-1]
+            sigma = kernelparameters[-1]
+            K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_) + \
                 np.eye(self.samples.shape[0]) * (noise_std ** 2)
             cc = np.linalg.cholesky(K + 1e-10 * np.eye(self.samples.shape[0]))
             mu = 0
@@ -278,7 +283,7 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None
                 # Design parameters (beta: regression coefficient)
                 beta = np.linalg.solve(g_, np.matmul(np.transpose(q_), y_dash))
                 mu = np.einsum("ij,jk->ik", self.F, beta)
-            alpha_ = cho_solve((cc, True), y_-mu)
+            alpha_ = cho_solve((cc, True), y_ - mu)
         else:
             cc, alpha_ = self.cc, self.alpha_
 
@@ -290,15 +295,20 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None
             else:
                 mu1 = np.einsum("ij,jk->ik", fx, self.beta)
 
-        k = self.kernel.c(x=x_, s=s_, params=kernelparameters)
+        self.kernel.kernel_parameter = kernelparameters[:-1]
+        sigma = kernelparameters[-1]
+
+        k = sigma**2*self.kernel.calculate_kernel_matrix(x=x_, s=s_)
         y = mu1 + k @ alpha_
         if self.normalize:
             y = self.value_mean + y * self.value_std
         if x_.shape[1] == 1:
             y = y.flatten()
 
         if return_std:
-            k1 = self.kernel.c(x=x_, s=x_, params=kernelparameters)
+            self.kernel.kernel_parameter = kernelparameters[:-1]
+            sigma = kernelparameters[-1]
+            k1 = sigma**2*self.kernel.calculate_kernel_matrix(x=x_, s=x_)
             var = (k1 - k @ cho_solve((cc, True), k.T)).diagonal()
             mse = np.sqrt(var)
             if self.normalize:
@@ -326,9 +336,13 @@ def log_likelihood(p0, k_, s, y, ind_noise, fx_):
         m = s.shape[0]
 
         if ind_noise:
-            k__ = k_.c(x=s, s=s, params=10 ** p0[:-1]) + np.eye(m) * (10 ** p0[-1]) ** 2
+            k_.kernel_parameter = 10 ** p0[:-2]
+            sigma = 10 ** p0[-2]
+            k__ = sigma ** 2 * k_.calculate_kernel_matrix(x=s, s=s) + np.eye(m) * (10 ** p0[-1]) ** 2
         else:
-            k__ = k_.c(x=s, s=s, params=10 ** p0)
+            k_.kernel_parameter = 10 ** p0[:-1]
+            sigma = 10 ** p0[-1]
+            k__ = sigma ** 2 * k_.calculate_kernel_matrix(x=s, s=s)
         cc = cholesky(k__ + 1e-10 * np.eye(m), lower=True)
 
         mu = 0
@@ -343,7 +357,7 @@ def log_likelihood(p0, k_, s, y, ind_noise, fx_):
             beta = np.linalg.solve(g_, np.matmul(np.transpose(q_), y_dash))
             mu = np.einsum("ij,jk->ik", fx_, beta)
 
-        term1 = (y-mu).T @ (cho_solve((cc, True), y-mu))
+        term1 = (y - mu).T @ (cho_solve((cc, True), y - mu))
         term2 = 2 * np.sum(np.log(np.abs(np.diag(cc))))
 
         return 0.5 * (term1 + term2 + m * np.log(2 * np.pi))[0, 0]
diff --git a/src/UQpy/surrogates/gaussian_process/kernels/Matern.py b/src/UQpy/surrogates/gaussian_process/kernels/Matern.py
@@ -4,29 +4,30 @@
 
 
 class Matern(Kernel):
-    def __init__(self, nu=1.5):
+    def __init__(self, kernel_parameter: Union[int, float] = 1, nu=1.5):
         """
         Matern Kernel is a generalization of Radial Basis Function kernel.
 
         :params nu: Shape parameter. For nu=0.5, 1.5, 2.5 and infinity, matern coincides with the exponential,
          matern-3/2, matern-5/2 and RBF covariance function, respectively.
         """
+        super().__init__(kernel_parameter)
         self.nu = nu
 
-    def c(self, x, s, params):
-        l, sigma = params[:-1], params[-1]
-        stack = cdist(x/l, s/l, metric='euclidean')
+    def calculate_kernel_matrix(self, x, s):
+        l = self.kernel_parameter
+        stack = cdist(x / l, s / l, metric='euclidean')
         if self.nu == 0.5:
-            return sigma**2 * np.exp(-np.abs(stack))
+            return np.exp(-np.abs(stack))
         elif self.nu == 1.5:
-            return sigma**2 * (1+np.sqrt(3)*stack)*np.exp(-np.sqrt(3)*stack)
+            return (1 + np.sqrt(3) * stack) * np.exp(-np.sqrt(3) * stack)
         elif self.nu == 2.5:
-            return sigma**2 * (1+np.sqrt(5)*stack+5*(stack**2)/3)*np.exp(-np.sqrt(5)*stack)
+            return (1 + np.sqrt(5) * stack + 5 * (stack ** 2) / 3) * np.exp(-np.sqrt(5) * stack)
         elif self.nu == np.inf:
-            return sigma**2 * np.exp(-(stack**2)/2)
+            return np.exp(-(stack ** 2) / 2)
         else:
-            stack *= np.sqrt(2*self.nu)
-            tmp = 1/(gamma(self.nu)*(2**(self.nu-1)))
+            stack *= np.sqrt(2 * self.nu)
+            tmp = 1 / (gamma(self.nu) * (2 ** (self.nu - 1)))
             tmp1 = stack ** self.nu
             tmp2 = kv(self.nu, stack)
-            return sigma**2 * tmp * tmp1 * tmp2
+            return tmp * tmp1 * tmp2
diff --git a/src/UQpy/surrogates/gaussian_process/kernels/RBF.py b/src/UQpy/surrogates/gaussian_process/kernels/RBF.py
@@ -1,15 +1,18 @@
+from typing import Union
+
 from UQpy.surrogates.gaussian_process.kernels.baseclass.Kernel import *
 
 
 class RBF(Kernel):
-    def c(self, x, s, params):
+    def __init__(self, kernel_parameter: Union[int, float] = 1.0):
+        super().__init__(kernel_parameter)
+
+    def calculate_kernel_matrix(self, x, s):
         """
         This method compute the RBF kernel on sample points 'x' and 's'.
 
-        :params x: An array containing input samples.
-        :params s: An array containing input samples.
-        :params params: A list/array of hyperparameters containing length scales and the process variance.
+        :params x: An array containing training points.
+        :params s: An array containing input points.
         """
-        stack = Kernel.check_samples_and_return_stack(x/params[:-1], s/params[:-1])
-        k = params[-1] ** 2 * np.exp(np.sum(-0.5 * (stack ** 2), axis=2))
-        return k
+        stack = Kernel.check_samples_and_return_stack(x / self.kernel_parameter, s / self.kernel_parameter)
+        return np.exp(np.sum(-0.5 * (stack ** 2), axis=2))
diff --git a/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py b/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py
@@ -1,4 +1,6 @@
 from abc import ABC, abstractmethod
+from typing import Union
+
 import numpy as np
 
 
@@ -8,8 +10,20 @@ class Kernel(ABC):
     functions.
     """
 
+    def __init__(self, kernel_parameter: Union[int, float]):
+        self.__kernel_parameter = kernel_parameter
+
+    @property
+    def kernel_parameter(self):
+        return self.__kernel_parameter
+
+    @kernel_parameter.setter
+    def kernel_parameter(self, value):
+        self.__kernel_parameter = value
+
+
     @abstractmethod
-    def c(self, x, s, params):
+    def calculate_kernel_matrix(self, x, s):
         """
         Abstract method that needs to be implemented by the user when creating a new Covariance function.
         """
@@ -23,25 +37,3 @@ def check_samples_and_return_stack(x, s):
             np.swapaxes(np.atleast_3d(x_), 1, 2), (1, np.size(s_, 0), 1)
         ) - np.tile(s_, (np.size(x_, 0), 1, 1))
         return stack
-
-    # @staticmethod
-    # def derivatives(x_, s_, params):
-    #     stack = Kernel.check_samples_and_return_stack(x_, s_)
-    #     # Taking stack and creating array of all thetaj*dij
-    #     after_parameters = params * abs(stack)
-    #     # Create matrix of all ones to compare
-    #     comp_ones = np.ones((np.size(x_, 0), np.size(s_, 0), np.size(s_, 1)))
-    #     # zeta_matrix has all values min{1,theta*dij}
-    #     zeta_matrix_ = np.minimum(after_parameters, comp_ones)
-    #     # Copy zeta_matrix to another matrix that will used to find where derivative should be zero
-    #     indices = zeta_matrix_.copy()
-    #     # If value of min{1,theta*dij} is 1, the derivative should be 0.
-    #     # So, replace all values of 1 with 0, then perform the .astype(bool).astype(int)
-    #     # operation like in the linear example, so you end up with an array of 1's where
-    #     # the derivative should be caluclated and 0 where it should be zero
-    #     indices[indices == 1] = 0
-    #     # Create matrix of all |dij| (where non zero) to be used in calculation of dR/dtheta
-    #     dtheta_derivs_ = indices.astype(bool).astype(int) * abs(stack)
-    #     # Same as above, but for matrix of all thetaj where non-zero
-    #     dx_derivs_ = indices.astype(bool).astype(int) * params * np.sign(stack)
-    #     return zeta_matrix_, dtheta_derivs_, dx_derivs_