From af8c31551b26b1902ea7d076c496e2232773dbeb Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Fri, 4 Mar 2022 04:06:12 +0100 Subject: [PATCH 1/4] LLR for non orthonormal basis Close #421 --- examples/plot_kernel_regression.py | 4 +- skfda/misc/hat_matrix.py | 101 ++++++++++------------ skfda/ml/regression/_kernel_regression.py | 8 +- tests/test_kernel_regression.py | 26 +++++- 4 files changed, 77 insertions(+), 62 deletions(-) diff --git a/examples/plot_kernel_regression.py b/examples/plot_kernel_regression.py index 39273791e..d3cb895bb 100644 --- a/examples/plot_kernel_regression.py +++ b/examples/plot_kernel_regression.py @@ -109,8 +109,8 @@ print('Score NW:', nw_res) ############################################################################## -# For Local Linear Regression, FDataBasis representation with an orthonormal -# basis should be used (for the previous cases it is possible to use either +# For Local Linear Regression, FDataBasis representation with a basis should be +# used (for the previous cases it is possible to use either # FDataGrid or FDataBasis). # # For basis, Fourier basis with 10 elements has been selected. Note that the diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index ae0e9e693..82d53f444 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -14,10 +14,9 @@ import numpy as np from sklearn.base import BaseEstimator, RegressorMixin -from skfda.representation._functional_data import FData -from skfda.representation.basis import FDataBasis - -from ..representation._typing import GridPoints, GridPointsLike +from ..representation._functional_data import FData +from ..representation._typing import GridPoints, GridPointsLike, NDArrayFloat +from ..representation.basis import FDataBasis from . import kernels @@ -36,7 +35,7 @@ def __init__( self, *, bandwidth: Optional[float] = None, - kernel: Callable[[np.ndarray], np.ndarray] = kernels.normal, + kernel: Callable[[NDArrayFloat], NDArrayFloat] = kernels.normal, ): self.bandwidth = bandwidth self.kernel = kernel @@ -44,13 +43,13 @@ def __init__( def __call__( self, *, - delta_x: np.ndarray, + delta_x: NDArrayFloat, X_train: Optional[Union[FData, GridPointsLike]] = None, X: Optional[Union[FData, GridPointsLike]] = None, - y_train: Optional[np.ndarray] = None, - weights: Optional[np.ndarray] = None, + y_train: Optional[NDArrayFloat] = None, + weights: Optional[NDArrayFloat] = None, _cv: bool = False, - ) -> np.ndarray: + ) -> NDArrayFloat: r""" Calculate the hat matrix or the prediction. @@ -99,8 +98,8 @@ def __call__( def _hat_matrix_function_not_normalized( self, *, - delta_x: np.ndarray, - ) -> np.ndarray: + delta_x: NDArrayFloat, + ) -> NDArrayFloat: pass @@ -141,8 +140,8 @@ class NadarayaWatsonHatMatrix(HatMatrix): def _hat_matrix_function_not_normalized( self, *, - delta_x: np.ndarray, - ) -> np.ndarray: + delta_x: NDArrayFloat, + ) -> NDArrayFloat: if self.bandwidth is None: percentage = 15 @@ -185,7 +184,7 @@ class LocalLinearRegressionHatMatrix(HatMatrix): For **kernel regression** algorithm: Given functional data, :math:`(X_1, X_2, ..., X_n)` where each function - is expressed in a orthonormal basis with :math:`J` elements and scalar + is expressed in a basis with :math:`J` elements and scalar response :math:`Y = (y_1, y_2, ..., y_n)`. It is desired to estimate the values @@ -222,13 +221,13 @@ class LocalLinearRegressionHatMatrix(HatMatrix): def __call__( # noqa: D102 self, *, - delta_x: np.ndarray, + delta_x: NDArrayFloat, X_train: Optional[Union[FDataBasis, GridPoints]] = None, X: Optional[Union[FDataBasis, GridPoints]] = None, - y_train: Optional[np.ndarray] = None, - weights: Optional[np.ndarray] = None, + y_train: Optional[NDArrayFloat] = None, + weights: Optional[NDArrayFloat] = None, _cv: bool = False, - ) -> np.ndarray: + ) -> NDArrayFloat: if self.bandwidth is None: percentage = 15 @@ -243,10 +242,27 @@ def __call__( # noqa: D102 m1 = X_train.coefficients m2 = X.coefficients + # Subtract previous matrices obtaining a 3D matrix + # The i-th element contains the matrix X_train - X[i] + C = m1 - m2[:, np.newaxis] + + inner_product_matrix = X_train.basis.inner_product_matrix() + + # Calculate new coefficients taking into account cross-products + # if the basis is orthonormal, C would not change + C = np.einsum( + 'ijk, kl -> ijl', + C, + inner_product_matrix, + ) + + # Adding a column of ones in the first position of all matrices + dims = (C.shape[0], C.shape[1], 1) + C = np.c_[np.ones(dims), C] + return self._solve_least_squares( delta_x=delta_x, - m1=m1, - m2=m2, + coefs=C, y_train=y_train, ) @@ -264,39 +280,16 @@ def __call__( # noqa: D102 def _solve_least_squares( self, - delta_x: np.ndarray, - m1: np.ndarray, - m2: np.ndarray, - y_train: np.ndarray, - ) -> np.ndarray: + delta_x: NDArrayFloat, + coefs: NDArrayFloat, + y_train: NDArrayFloat, + ) -> NDArrayFloat: W = np.sqrt(self.kernel(delta_x / self.bandwidth)) - # Adding a column of ones to m1 - m1 = np.concatenate( - ( - np.ones(m1.shape[0])[:, np.newaxis], - m1, - ), - axis=1, - ) - - # Adding a column of zeros to m2 - m2 = np.concatenate( - ( - np.zeros(m2.shape[0])[:, np.newaxis], - m2, - ), - axis=1, - ) - - # Subtract previous matrices obtaining a 3D matrix - # The i-th element contains the matrix X_train - X[i] - C = m1 - m2[:, np.newaxis] - # A x = b - # Where x = (a, b_1, ..., b_J) - A = (C.T * W.T).T + # Where x = (a, b_1, ..., b_J). + A = (coefs.T * W.T).T b = np.einsum('ij, j... -> ij...', W, y_train) # For Ax = b calculates x that minimize the square error @@ -312,8 +305,8 @@ def _solve_least_squares( def _hat_matrix_function_not_normalized( self, *, - delta_x: np.ndarray, - ) -> np.ndarray: + delta_x: NDArrayFloat, + ) -> NDArrayFloat: if self.bandwidth is None: percentage = 15 @@ -369,7 +362,7 @@ def __init__( self, *, n_neighbors: Optional[int] = None, - kernel: Callable[[np.ndarray], np.ndarray] = kernels.uniform, + kernel: Callable[[NDArrayFloat], NDArrayFloat] = kernels.uniform, ): self.n_neighbors = n_neighbors self.kernel = kernel @@ -377,8 +370,8 @@ def __init__( def _hat_matrix_function_not_normalized( self, *, - delta_x: np.ndarray, - ) -> np.ndarray: + delta_x: NDArrayFloat, + ) -> NDArrayFloat: input_points_len = delta_x.shape[1] diff --git a/skfda/ml/regression/_kernel_regression.py b/skfda/ml/regression/_kernel_regression.py index 2834cae86..aecc64c60 100644 --- a/skfda/ml/regression/_kernel_regression.py +++ b/skfda/ml/regression/_kernel_regression.py @@ -6,10 +6,10 @@ from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.validation import check_is_fitted -from skfda.misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix -from skfda.misc.metrics import PairwiseMetric, l2_distance -from skfda.misc.metrics._typing import Metric -from skfda.representation._functional_data import FData +from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix +from ...misc.metrics import PairwiseMetric, l2_distance +from ...misc.metrics._typing import Metric +from ...representation._functional_data import FData class KernelRegression( diff --git a/tests/test_kernel_regression.py b/tests/test_kernel_regression.py index 01e3d9748..c7016a2a3 100644 --- a/tests/test_kernel_regression.py +++ b/tests/test_kernel_regression.py @@ -15,7 +15,7 @@ from skfda.misc.kernels import normal, uniform from skfda.misc.metrics import l2_distance from skfda.ml.regression import KernelRegression -from skfda.representation.basis import FDataBasis, Fourier +from skfda.representation.basis import FDataBasis, Fourier, Monomial from skfda.representation.grid import FDataGrid @@ -80,7 +80,7 @@ def _llr_alt( C = np.concatenate( ( - (np.ones(fd_train.n_samples))[:, np.newaxis], + np.ones(fd_train.n_samples)[:, np.newaxis], (fd_train - fd_test[i]).coefficients, ), axis=1, @@ -313,3 +313,25 @@ def test_knn_r(self) -> None: ] np.testing.assert_almost_equal(y, result_R, decimal=6) + + +class TestNonOthonormalBasisLLR(unittest.TestCase): + """Test LocalLinearRegression method with non orthonormal basis.""" + + def test_llr_non_orthonormal(self): + """Test LocalLinearRegression with monomial basis.""" + coef1 = [[1, 5, 8], [4, 6, 6], [9, 4, 1]] + coef2 = [[6, 3, 5]] + basis = Monomial(n_basis=3, domain_range=(0, 3)) + + X_train = FDataBasis(coefficients=coef1, basis=basis) + X = FDataBasis(coefficients=coef2, basis=basis) + y_train = np.array([8, 6, 1]) + + llr = LocalLinearRegressionHatMatrix( + bandwidth=100, + kernel=uniform, + ) + kr = KernelRegression(kernel_estimator=llr) + kr.fit(X_train, y_train) + np.testing.assert_almost_equal(kr.predict(X), 4.35735166) From d6f0732c8c80ef9a9b794f5f4bc2b488a5b59c85 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Fri, 4 Mar 2022 04:37:14 +0100 Subject: [PATCH 2/4] Update test_kernel_regression.py --- tests/test_kernel_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_kernel_regression.py b/tests/test_kernel_regression.py index c7016a2a3..afda97610 100644 --- a/tests/test_kernel_regression.py +++ b/tests/test_kernel_regression.py @@ -318,7 +318,7 @@ def test_knn_r(self) -> None: class TestNonOthonormalBasisLLR(unittest.TestCase): """Test LocalLinearRegression method with non orthonormal basis.""" - def test_llr_non_orthonormal(self): + def test_llr_non_orthonormal(self) -> None: """Test LocalLinearRegression with monomial basis.""" coef1 = [[1, 5, 8], [4, 6, 6], [9, 4, 1]] coef2 = [[6, 3, 5]] From 45bce765650a62940369a16f5999547d2bcb98ee Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Wed, 9 Mar 2022 16:05:21 +0100 Subject: [PATCH 3/4] Update hat_matrix.py --- skfda/misc/hat_matrix.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index 82d53f444..211baac42 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -250,11 +250,7 @@ def __call__( # noqa: D102 # Calculate new coefficients taking into account cross-products # if the basis is orthonormal, C would not change - C = np.einsum( - 'ijk, kl -> ijl', - C, - inner_product_matrix, - ) + C = C @ inner_product_matrix # Adding a column of ones in the first position of all matrices dims = (C.shape[0], C.shape[1], 1) From 2aebfbd19384fa06e82b6d84106efb99c73c31d0 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Thu, 10 Mar 2022 23:05:33 +0100 Subject: [PATCH 4/4] Update hat_matrix.py --- skfda/misc/hat_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index 211baac42..67f51810e 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -254,7 +254,7 @@ def __call__( # noqa: D102 # Adding a column of ones in the first position of all matrices dims = (C.shape[0], C.shape[1], 1) - C = np.c_[np.ones(dims), C] + C = np.concatenate((np.ones(dims), C), axis=-1) return self._solve_least_squares( delta_x=delta_x,