diff --git a/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py b/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py index 1d4741e58..caf86aa00 100755 --- a/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py +++ b/src/UQpy/surrogates/gaussian_process/GaussianProcessRegression.py @@ -4,7 +4,6 @@ from beartype import beartype - from UQpy.utilities.Utilities import process_random_state from UQpy.surrogates.baseclass.Surrogate import Surrogate from UQpy.utilities.ValidationTypes import RandomStateType @@ -204,14 +203,18 @@ def fit( raise NotImplementedError("Maximum likelihood estimator failed: Choose different starting point or " "increase nopt") t = np.argmin(fun_value) - self.hyperparameters = 10**minimizer[t, :] + self.hyperparameters = 10 ** minimizer[t, :] # Updated Correlation matrix corresponding to MLE estimates of hyperparameters if self.noise: - self.K = self.kernel.c(x=s_, s=s_, params=self.hyperparameters[:-1]) + \ - np.eye(nsamples)*(self.hyperparameters[-1])**2 + self.kernel.kernel_parameter = self.hyperparameters[:-2] + sigma = self.hyperparameters[-2] + self.K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_) + \ + np.eye(nsamples) * (self.hyperparameters[-1]) ** 2 else: - self.K = self.kernel.c(x=s_, s=s_, params=self.hyperparameters) + self.kernel.kernel_parameter = self.hyperparameters[:-1] + sigma = self.hyperparameters[-1] + self.K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_) self.cc = cholesky(self.K + 1e-10 * np.eye(nsamples), lower=True) self.alpha_ = cho_solve((self.cc, True), y_) @@ -264,7 +267,9 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None if hyperparameters is not None: # This is used for MLE constraints, if constraints call 'predict' method. - K = self.kernel.c(x=s_, s=s_, params=kernelparameters) + \ + self.kernel.kernel_parameter = kernelparameters[:-1] + sigma = kernelparameters[-1] + K = sigma ** 2 * self.kernel.calculate_kernel_matrix(x=s_, s=s_) + \ np.eye(self.samples.shape[0]) * (noise_std ** 2) cc = np.linalg.cholesky(K + 1e-10 * np.eye(self.samples.shape[0])) mu = 0 @@ -278,7 +283,7 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None # Design parameters (beta: regression coefficient) beta = np.linalg.solve(g_, np.matmul(np.transpose(q_), y_dash)) mu = np.einsum("ij,jk->ik", self.F, beta) - alpha_ = cho_solve((cc, True), y_-mu) + alpha_ = cho_solve((cc, True), y_ - mu) else: cc, alpha_ = self.cc, self.alpha_ @@ -290,7 +295,10 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None else: mu1 = np.einsum("ij,jk->ik", fx, self.beta) - k = self.kernel.c(x=x_, s=s_, params=kernelparameters) + self.kernel.kernel_parameter = kernelparameters[:-1] + sigma = kernelparameters[-1] + + k = sigma**2*self.kernel.calculate_kernel_matrix(x=x_, s=s_) y = mu1 + k @ alpha_ if self.normalize: y = self.value_mean + y * self.value_std @@ -298,7 +306,9 @@ def predict(self, points, return_std: bool = False, hyperparameters: list = None y = y.flatten() if return_std: - k1 = self.kernel.c(x=x_, s=x_, params=kernelparameters) + self.kernel.kernel_parameter = kernelparameters[:-1] + sigma = kernelparameters[-1] + k1 = sigma**2*self.kernel.calculate_kernel_matrix(x=x_, s=x_) var = (k1 - k @ cho_solve((cc, True), k.T)).diagonal() mse = np.sqrt(var) if self.normalize: @@ -326,9 +336,13 @@ def log_likelihood(p0, k_, s, y, ind_noise, fx_): m = s.shape[0] if ind_noise: - k__ = k_.c(x=s, s=s, params=10 ** p0[:-1]) + np.eye(m) * (10 ** p0[-1]) ** 2 + k_.kernel_parameter = 10 ** p0[:-2] + sigma = 10 ** p0[-2] + k__ = sigma ** 2 * k_.calculate_kernel_matrix(x=s, s=s) + np.eye(m) * (10 ** p0[-1]) ** 2 else: - k__ = k_.c(x=s, s=s, params=10 ** p0) + k_.kernel_parameter = 10 ** p0[:-1] + sigma = 10 ** p0[-1] + k__ = sigma ** 2 * k_.calculate_kernel_matrix(x=s, s=s) cc = cholesky(k__ + 1e-10 * np.eye(m), lower=True) mu = 0 @@ -343,7 +357,7 @@ def log_likelihood(p0, k_, s, y, ind_noise, fx_): beta = np.linalg.solve(g_, np.matmul(np.transpose(q_), y_dash)) mu = np.einsum("ij,jk->ik", fx_, beta) - term1 = (y-mu).T @ (cho_solve((cc, True), y-mu)) + term1 = (y - mu).T @ (cho_solve((cc, True), y - mu)) term2 = 2 * np.sum(np.log(np.abs(np.diag(cc)))) return 0.5 * (term1 + term2 + m * np.log(2 * np.pi))[0, 0] diff --git a/src/UQpy/surrogates/gaussian_process/kernels/Matern.py b/src/UQpy/surrogates/gaussian_process/kernels/Matern.py index 78b2edb97..28aba9c1e 100644 --- a/src/UQpy/surrogates/gaussian_process/kernels/Matern.py +++ b/src/UQpy/surrogates/gaussian_process/kernels/Matern.py @@ -4,29 +4,30 @@ class Matern(Kernel): - def __init__(self, nu=1.5): + def __init__(self, kernel_parameter: Union[int, float] = 1, nu=1.5): """ Matern Kernel is a generalization of Radial Basis Function kernel. :params nu: Shape parameter. For nu=0.5, 1.5, 2.5 and infinity, matern coincides with the exponential, matern-3/2, matern-5/2 and RBF covariance function, respectively. """ + super().__init__(kernel_parameter) self.nu = nu - def c(self, x, s, params): - l, sigma = params[:-1], params[-1] - stack = cdist(x/l, s/l, metric='euclidean') + def calculate_kernel_matrix(self, x, s): + l = self.kernel_parameter + stack = cdist(x / l, s / l, metric='euclidean') if self.nu == 0.5: - return sigma**2 * np.exp(-np.abs(stack)) + return np.exp(-np.abs(stack)) elif self.nu == 1.5: - return sigma**2 * (1+np.sqrt(3)*stack)*np.exp(-np.sqrt(3)*stack) + return (1 + np.sqrt(3) * stack) * np.exp(-np.sqrt(3) * stack) elif self.nu == 2.5: - return sigma**2 * (1+np.sqrt(5)*stack+5*(stack**2)/3)*np.exp(-np.sqrt(5)*stack) + return (1 + np.sqrt(5) * stack + 5 * (stack ** 2) / 3) * np.exp(-np.sqrt(5) * stack) elif self.nu == np.inf: - return sigma**2 * np.exp(-(stack**2)/2) + return np.exp(-(stack ** 2) / 2) else: - stack *= np.sqrt(2*self.nu) - tmp = 1/(gamma(self.nu)*(2**(self.nu-1))) + stack *= np.sqrt(2 * self.nu) + tmp = 1 / (gamma(self.nu) * (2 ** (self.nu - 1))) tmp1 = stack ** self.nu tmp2 = kv(self.nu, stack) - return sigma**2 * tmp * tmp1 * tmp2 + return tmp * tmp1 * tmp2 diff --git a/src/UQpy/surrogates/gaussian_process/kernels/RBF.py b/src/UQpy/surrogates/gaussian_process/kernels/RBF.py index bd56c2b91..b504249c2 100644 --- a/src/UQpy/surrogates/gaussian_process/kernels/RBF.py +++ b/src/UQpy/surrogates/gaussian_process/kernels/RBF.py @@ -1,15 +1,18 @@ +from typing import Union + from UQpy.surrogates.gaussian_process.kernels.baseclass.Kernel import * class RBF(Kernel): - def c(self, x, s, params): + def __init__(self, kernel_parameter: Union[int, float] = 1.0): + super().__init__(kernel_parameter) + + def calculate_kernel_matrix(self, x, s): """ This method compute the RBF kernel on sample points 'x' and 's'. - :params x: An array containing input samples. - :params s: An array containing input samples. - :params params: A list/array of hyperparameters containing length scales and the process variance. + :params x: An array containing training points. + :params s: An array containing input points. """ - stack = Kernel.check_samples_and_return_stack(x/params[:-1], s/params[:-1]) - k = params[-1] ** 2 * np.exp(np.sum(-0.5 * (stack ** 2), axis=2)) - return k + stack = Kernel.check_samples_and_return_stack(x / self.kernel_parameter, s / self.kernel_parameter) + return np.exp(np.sum(-0.5 * (stack ** 2), axis=2)) diff --git a/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py b/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py index 2336930bc..5724147cb 100644 --- a/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py +++ b/src/UQpy/surrogates/gaussian_process/kernels/baseclass/Kernel.py @@ -1,4 +1,6 @@ from abc import ABC, abstractmethod +from typing import Union + import numpy as np @@ -8,8 +10,20 @@ class Kernel(ABC): functions. """ + def __init__(self, kernel_parameter: Union[int, float]): + self.__kernel_parameter = kernel_parameter + + @property + def kernel_parameter(self): + return self.__kernel_parameter + + @kernel_parameter.setter + def kernel_parameter(self, value): + self.__kernel_parameter = value + + @abstractmethod - def c(self, x, s, params): + def calculate_kernel_matrix(self, x, s): """ Abstract method that needs to be implemented by the user when creating a new Covariance function. """ @@ -23,25 +37,3 @@ def check_samples_and_return_stack(x, s): np.swapaxes(np.atleast_3d(x_), 1, 2), (1, np.size(s_, 0), 1) ) - np.tile(s_, (np.size(x_, 0), 1, 1)) return stack - - # @staticmethod - # def derivatives(x_, s_, params): - # stack = Kernel.check_samples_and_return_stack(x_, s_) - # # Taking stack and creating array of all thetaj*dij - # after_parameters = params * abs(stack) - # # Create matrix of all ones to compare - # comp_ones = np.ones((np.size(x_, 0), np.size(s_, 0), np.size(s_, 1))) - # # zeta_matrix has all values min{1,theta*dij} - # zeta_matrix_ = np.minimum(after_parameters, comp_ones) - # # Copy zeta_matrix to another matrix that will used to find where derivative should be zero - # indices = zeta_matrix_.copy() - # # If value of min{1,theta*dij} is 1, the derivative should be 0. - # # So, replace all values of 1 with 0, then perform the .astype(bool).astype(int) - # # operation like in the linear example, so you end up with an array of 1's where - # # the derivative should be caluclated and 0 where it should be zero - # indices[indices == 1] = 0 - # # Create matrix of all |dij| (where non zero) to be used in calculation of dR/dtheta - # dtheta_derivs_ = indices.astype(bool).astype(int) * abs(stack) - # # Same as above, but for matrix of all thetaj where non-zero - # dx_derivs_ = indices.astype(bool).astype(int) * params * np.sign(stack) - # return zeta_matrix_, dtheta_derivs_, dx_derivs_ diff --git a/tests/unit_tests/surrogates/test_gpr.py b/tests/unit_tests/surrogates/test_gpr.py index 504369b97..e5234aa38 100644 --- a/tests/unit_tests/surrogates/test_gpr.py +++ b/tests/unit_tests/surrogates/test_gpr.py @@ -14,7 +14,6 @@ from UQpy.surrogates.gaussian_process.regression_models import LinearRegression, ConstantRegression, QuadraticRegression from UQpy.surrogates.gaussian_process.kernels import RBF, Matern - samples = np.linspace(0, 5, 20).reshape(-1, 1) values = np.cos(samples) optimizer = MinimizeOptimizer(method="L-BFGS-B", bounds=[[0.1, 5], [0.1, 3]]) @@ -49,35 +48,34 @@ gpr3 = GaussianProcessRegression(regression_model=constant_reg, kernel=RBF(), hyperparameters=[2.852, 2.959], random_state=1, optimizer=optimizer3, optimize_constraints=non_negative, bounds=[[0.1, 5], [0.1, 3]]) -gpr3.fit(samples=samples, values=values+1.05) +gpr3.fit(samples=samples, values=values + 1.05) gpr4 = GaussianProcessRegression(kernel=RBF(), hyperparameters=[2.852, 2.959, 0.001], random_state=1, optimizer=optimizer3, bounds=[[0.1, 5], [0.1, 3], [1e-6, 1e-1]], noise=True) gpr4.fit(samples=samples, values=values) - def test_predict1(): - prediction = np.round(gpr.predict([[1], [np.pi/2], [np.pi]], True), 3) - expected_prediction = np.array([[0.54, 0., -1.], [0., 0., 0.]]) + prediction = np.round(gpr.predict([[1], [np.pi / 2], [np.pi]], True), 3) + expected_prediction = np.array([[0.54, 0., -1.], [0., 0., 0.]]) assert (expected_prediction == prediction).all() def test_predict2(): - prediction = np.round(gpr2.predict([[1], [2*np.pi], [np.pi]], True), 3) - expected_prediction = np.array([[0.537, 0.238, -0.998], [0.077, 0.39, 0.046]]) + prediction = np.round(gpr2.predict([[1], [2 * np.pi], [np.pi]], True), 3) + expected_prediction = np.array([[0.537, 0.238, -0.998], [0.077, 0.39, 0.046]]) assert (expected_prediction == prediction).all() def test_predict3(): - prediction = np.round(gpr3.predict([[1], [2*np.pi], [np.pi]], True), 3) + prediction = np.round(gpr3.predict([[1], [2 * np.pi], [np.pi]], True), 3) expected_prediction = np.array([[1.59, 2.047, 0.05], [0., 0.006, 0.]]) assert (expected_prediction == prediction).all() def test_predict4(): prediction = np.round(gpr4.predict([[1], [2 * np.pi], [np.pi]], True), 3) - expected_prediction = np.array([[0.54, 0.983, -1.], [0., 0.04, 0.]]) + expected_prediction = np.array([[0.54, 0.983, -1.], [0., 0.04, 0.]]) assert np.isclose(prediction, expected_prediction, atol=0.05).all() @@ -85,10 +83,12 @@ def test_rbf(): """ Test RBF kernel for 1-d input model """ - cov = rbf_kernel.c(points11, points12, hyperparameters1) + rbf_kernel.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * rbf_kernel.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[80.074, 100., 80.074], [41.111, 80.074, 41.111], - [13.534, 41.111, 13.534]]) + expected_covariance_matrix = np.array([[80.074, 100., 80.074], [41.111, 80.074, 41.111], + [13.534, 41.111, 13.534]]) assert (expected_covariance_matrix == covariance_matrix).all() @@ -96,7 +96,9 @@ def test_rbf2(): """ Test RBF kernel for 2-d input model """ - cov = rbf_kernel.c(points21, points22, hyperparameters2) + rbf_kernel.kernel_parameter = hyperparameters2[:-1] + sigma = hyperparameters2[-1] + cov = sigma ** 2 * rbf_kernel.calculate_kernel_matrix(points21, points22) covariance_matrix2 = np.round(cov, 3) expected_covariance_matrix2 = np.array([[3.784e+00, 5.120e-01, 5.410e-01], [1.943e+00, 2.426e+00, 4.200e-02], [3.280e-01, 3.784e+00, 1.000e-03]]) @@ -107,18 +109,22 @@ def test_matern_inf(): """ Test Matern kernel (nu=inf) for 1-d input model, should concide with RBF kernel """ - cov = matern_kernel_inf.c(points11, points12, hyperparameters1) + matern_kernel_inf.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * matern_kernel_inf.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[80.074, 100., 80.074], [41.111, 80.074, 41.111], - [13.534, 41.111, 13.534]]) + expected_covariance_matrix = np.array([[80.074, 100., 80.074], [41.111, 80.074, 41.111], + [13.534, 41.111, 13.534]]) assert (expected_covariance_matrix == covariance_matrix).all() -def tets_matern_inf2(): +def test_matern_inf2(): """ Test Matern kernel (nu=inf) for 2-d input model """ - cov = matern_kernel_inf.c(points21, points22, hyperparameters2) + matern_kernel_inf.kernel_parameter = hyperparameters2[:-1] + sigma = hyperparameters2[-1] + cov = sigma ** 2 * matern_kernel_inf.calculate_kernel_matrix(points21, points22) covariance_matrix2 = np.round(cov, 3) expected_covariance_matrix2 = np.array([[3.784e+00, 5.120e-01, 5.410e-01], [1.943e+00, 2.426e+00, 4.200e-02], [3.280e-01, 3.784e+00, 1.000e-03]]) @@ -129,10 +135,12 @@ def test_matern_1_2(): """ Test Matern kernel (nu=0.5) for 1-d input model, should concide with absolute exponential kernel """ - cov = matern_kernel_1_2.c(points11, points12, hyperparameters1) + matern_kernel_1_2.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * matern_kernel_1_2.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[51.342, 100., 51.342], [26.36, 51.342, 26.36], - [13.534, 26.36, 13.534]]) + expected_covariance_matrix = np.array([[51.342, 100., 51.342], [26.36, 51.342, 26.36], + [13.534, 26.36, 13.534]]) assert (expected_covariance_matrix == covariance_matrix).all() @@ -140,7 +148,9 @@ def test_matern_1_2_2(): """ Test Matern kernel (nu=0.5) for 2-d input model """ - cov = matern_kernel_1_2.c(points21, points22, hyperparameters2) + matern_kernel_1_2.kernel_parameter = hyperparameters2[:-1] + sigma = hyperparameters2[-1] + cov = sigma ** 2 * matern_kernel_1_2.calculate_kernel_matrix(points21, points22) covariance_matrix2 = np.round(cov, 3) expected_covariance_matrix2 = np.array([[2.866, 0.527, 0.541], [1.203, 1.472, 0.196], [0.428, 2.866, 0.069]]) assert (expected_covariance_matrix2 == covariance_matrix2).all() @@ -150,10 +160,12 @@ def test_matern_3_2(): """ Test Matern kernel (nu=1.5) for 1-d input model """ - cov = matern_kernel_3_2.c(points11, points12, hyperparameters1) + matern_kernel_3_2.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * matern_kernel_3_2.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[67.906, 100., 67.906], [32.869, 67.906, 32.869], - [13.973, 32.869, 13.973]]) + expected_covariance_matrix = np.array([[67.906, 100., 67.906], [32.869, 67.906, 32.869], + [13.973, 32.869, 13.973]]) assert (expected_covariance_matrix == covariance_matrix).all() @@ -161,10 +173,12 @@ def test_matern_5_2(): """ Test Matern kernel (nu=2.5) for 1-d input model """ - cov = matern_kernel_5_2.c(points11, points12, hyperparameters1) + matern_kernel_5_2.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * matern_kernel_5_2.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[72.776, 100., 72.776], [35.222, 72.776, 35.222], - [13.866, 35.222, 13.866]]) + expected_covariance_matrix = np.array([[72.776, 100., 72.776], [35.222, 72.776, 35.222], + [13.866, 35.222, 13.866]]) assert (expected_covariance_matrix == covariance_matrix).all() @@ -172,9 +186,11 @@ def test_matern_2_1(): """ Test Matern kernel (nu=2) for 1-d input model """ - cov = matern_kernel_2_1.c(points11, points12, hyperparameters1) + matern_kernel_2_1.kernel_parameter = hyperparameters1[:-1] + sigma = hyperparameters1[-1] + cov = sigma ** 2 * matern_kernel_2_1.calculate_kernel_matrix(points11, points12) covariance_matrix = np.round(cov, 3) - expected_covariance_matrix = np.array([[70.894, np.nan, 70.894], [34.25, 70.894, 34.25], + expected_covariance_matrix = np.array([[70.894, np.nan, 70.894], [34.25, 70.894, 34.25], [13.921, 34.25, 13.921]]) assert np.array_equal(expected_covariance_matrix, covariance_matrix, equal_nan=True) @@ -426,4 +442,4 @@ def test_hyperparameters_length(): # # Put a legend to the right of the current axis # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # plt.grid() -# plt.show() \ No newline at end of file +# plt.show()