DiffusionMaps and Kernels refactoring according to #168

SURGroup · Apr 6, 2022 · 9cc21da · 9cc21da
1 parent 0ecb1cf
commit 9cc21da
Show file tree

Hide file tree

Showing 14 changed files with 87 additions and 140 deletions.
diff --git a/docs/code/dimension_reduction/diffusion_maps/plot_diffusion_maps_circle.py b/docs/code/dimension_reduction/diffusion_maps/plot_diffusion_maps_circle.py
@@ -58,16 +58,8 @@
 
 # %%
 
-dmaps = DiffusionMaps.build_from_data(data=X, alpha=1, n_eigenvectors=5,
-                                      kernel=GaussianKernel(epsilon=0.3))
-
-# %% md
-#
-# Use the method `mapping` to compute the diffusion coordinates assuming `epsilon=0.3`.
-
-# %%
-
-dmaps.fit()
+dmaps = DiffusionMaps(data=X, alpha=1, n_eigenvectors=5,
+                      kernel=GaussianKernel(epsilon=0.3))
 
 # %% md
 #
@@ -90,4 +82,3 @@
 ax = fig.gca(projection='3d')
 ax.scatter(x, y, z, c=color, cmap=plt.cm.Spectral, s=8)
 plt.show()
-
diff --git a/docs/code/dimension_reduction/diffusion_maps/plot_dmaps_swiss_role.py b/docs/code/dimension_reduction/diffusion_maps/plot_dmaps_swiss_role.py
@@ -42,30 +42,25 @@
 # Case 1: Find the optimal parameter of the Gaussian kernel scale epsilon
 kernel = GaussianKernel()
 
-dmaps_object = DiffusionMaps.build_from_data(data=X,
-                                             alpha=1.0, n_eigenvectors=9,
-                                             is_sparse=True, n_neighbors=100,
-                                             optimize_parameters=True,
-                                             kernel=kernel)
+dmaps_object = DiffusionMaps(data=X,
+                             alpha=1.0, n_eigenvectors=9,
+                             is_sparse=True, n_neighbors=100,
+                             kernel=kernel)
 
-print('epsilon', kernel.epsilon)
-
-# %% md
-# Fit the data and calculate the embedding, the eigenvectors and eigenvalues
-dmaps_object.fit()
 
 # %% md
 #
 # Find the parsimonious representation of the eigenvectors. Identify the two most informative
 # eigenvectors.
 
-index, residuals = DiffusionMaps.parsimonious(dmaps_object.eigenvectors, 2)
+dmaps_object.parsimonious(dim=2)
 
-print('most informative eigenvectors:', index)
+print('most informative eigenvectors:', dmaps_object.parsimonious_indices)
 
 # %% md
 #
 # Plot the diffusion coordinates
 
-DiffusionMaps._plot_eigen_pairs(dmaps_object.eigenvectors, pair_indices=index, color=X_color, figure_size=[12, 12])
+DiffusionMaps._plot_eigen_pairs(dmaps_object.eigenvectors, pair_indices=dmaps_object.parsimonious_indices,
+                                color=X_color, figure_size=[12, 12])
 plt.show()
diff --git a/docs/code/dimension_reduction/diffusion_maps/plot_grassmann_dmaps.py b/docs/code/dimension_reduction/diffusion_maps/plot_grassmann_dmaps.py
@@ -89,11 +89,11 @@
 #
 # Diffusion maps with Grassmann kernel
 kernel = ProjectionKernel()
-kernel_matrix = kernel.calculate_kernel_matrix(psi)
+kernel.calculate_kernel_matrix(psi)
+
 
 Gdmaps_UQpy = DiffusionMaps(alpha=1.0, n_eigenvectors=5, is_sparse=True, n_neighbors=250,
-                            kernel_matrix=kernel_matrix)
-Gdmaps_UQpy.fit()
+                            kernel_matrix=kernel.kernel_matrix)
 
 #%%
 #

diff --git a/docs/code/dimension_reduction/grassmann/plot_grassmann_kernel.py b/docs/code/dimension_reduction/grassmann/plot_grassmann_kernel.py
@@ -73,8 +73,11 @@
 # %%
 projection_kernel = ProjectionKernel()
 
-kernel_psi = projection_kernel.calculate_kernel_matrix(points=manifold_projection.u)
-kernel_phi = projection_kernel.calculate_kernel_matrix(points=manifold_projection.v)
+projection_kernel.calculate_kernel_matrix(points=manifold_projection.u)
+kernel_psi = projection_kernel.kernel_matrix
+
+projection_kernel.calculate_kernel_matrix(points=manifold_projection.v)
+kernel_phi = projection_kernel.kernel_matrix
 
 fig, (ax1, ax2) = plt.subplots(1, 2)
 ax1.title.set_text('kernel_psi')
@@ -89,9 +92,10 @@
 
 # %%
 
-kernel_01 = projection_kernel.calculate_kernel_matrix(points=[manifold_projection.u[0],
-                                                              manifold_projection.u[1],
-                                                              manifold_projection.u[2]])
+projection_kernel.calculate_kernel_matrix(points=[manifold_projection.u[0],
+                                                  manifold_projection.u[1],
+                                                  manifold_projection.u[2]])
+kernel_01 = projection_kernel.kernel_matrix
 
 fig = plt.figure()
 plt.imshow(kernel_01)
@@ -105,18 +109,20 @@
 # %%
 from UQpy.utilities.kernels.baseclass.GrassmannianKernel import GrassmannianKernel
 
-
 class UserKernel(GrassmannianKernel):
 
-    def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint):
+    def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint):
         r = np.dot(xi.data.T, xj.data)
         det = np.linalg.det(r)
         return det * det
 
 
 user_kernel = UserKernel()
-kernel_user_psi = user_kernel.calculate_kernel_matrix(points=manifold_projection.u)
-kernel_user_phi = user_kernel.calculate_kernel_matrix(points=manifold_projection.v)
+user_kernel.calculate_kernel_matrix(points=manifold_projection.u)
+kernel_user_psi = user_kernel.kernel_matrix
+
+user_kernel.calculate_kernel_matrix(points=manifold_projection.v)
+kernel_user_phi = user_kernel.kernel_matrix
 
 fig, (ax1, ax2) = plt.subplots(1, 2)
 ax1.title.set_text('kernel_psi')

diff --git a/docs/source/dimension_reduction/dmaps.rst b/docs/source/dimension_reduction/dmaps.rst
@@ -22,14 +22,16 @@ One can use the following method to instantiate the :class:`.DiffusionMaps` clas
 Methods
 ~~~~~~~~~~~
 .. autoclass:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps
-    :members: build_from_data, fit, parsimonious, estimate_epsilon
+    :members: parsimonious
 
 Attributes
 ~~~~~~~~~~~~~
 .. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.transition_matrix
 .. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.diffusion_coordinates
 .. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.eigenvectors
 .. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.eigenvalues
+.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.parsimonious_indices
+.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.parsimonious_residuals
 
 Examples
 ~~~~~~~~~~~~~

diff --git a/docs/source/utilities/kernels/index.rst b/docs/source/utilities/kernels/index.rst
@@ -25,7 +25,7 @@ The :class:`UQpy.utilities.kernels.baseclass.Kernel` class is imported using the
 >>> from UQpy.utilities.kernels.baseclass.Kernel import Kernel
 
 .. autoclass:: UQpy.utilities.kernels.baseclass.Kernel
-    :members: kernel_entry
+    :members: _kernel_entry
 
 
 .. toctree::

diff --git a/src/UQpy/dimension_reduction/diffusion_maps/DiffusionMaps.py b/src/UQpy/dimension_reduction/diffusion_maps/DiffusionMaps.py
@@ -25,16 +25,16 @@ class DiffusionMaps:
 
     @beartype
     def __init__(
-        self,
-        kernel_matrix: Numpy2DFloatArray,
-        data: Union[list[np.ndarray], list[GrassmannPoint]] = None,
-        kernel: Kernel = None,
-        alpha: AlphaType = 0.5,
-        n_eigenvectors: IntegerLargerThanUnityType = 2,
-        is_sparse: bool = False,
-        n_neighbors: IntegerLargerThanUnityType = 1,
-        random_state: RandomStateType = None,
-        t: int = 1
+            self,
+            kernel_matrix: Numpy2DFloatArray = None,
+            data: Union[np.ndarray, list[GrassmannPoint]] = None,
+            kernel: Kernel = None,
+            alpha: AlphaType = 0.5,
+            n_eigenvectors: IntegerLargerThanUnityType = 2,
+            is_sparse: bool = False,
+            n_neighbors: IntegerLargerThanUnityType = 1,
+            random_state: RandomStateType = None,
+            t: int = 1
     ):
         """
 
@@ -57,10 +57,18 @@ def __init__(
          object itself can be passed directly.
         :param t: Time exponent.
         """
+        self.parsimonious_residuals = None
+        """Residuals calculated from the Parsimonious Representation. This attribute will only be populated if the 
+        :py:meth:`parsimonious`  method is invoked."""
+        self.parsimonious_indices = None
+        """Indices of the most important eigenvectors. This attribute will only be populated if the 
+        :py:meth:`parsimonious`  method is invoked."""
+
         if kernel_matrix is not None:
             self.kernel_matrix = kernel_matrix
         elif data is not None and kernel is not None:
-            self.kernel_matrix = kernel.calculate_kernel_matrix(points=data)
+            kernel.calculate_kernel_matrix(points=data)
+            self.kernel_matrix = kernel.kernel_matrix
         else:
             raise ValueError("Either `kernel_matrix` or both `data` and `kernel` must be provided")
 
@@ -90,15 +98,11 @@ def __init__(
         '''
         self.cut_off = None
 
-        if kernel_matrix is not None:
-            self.kernel_matrix = kernel_matrix
+        self._fit()
 
-    def _fit(self) -> tuple[NumpyFloatArray, NumpyFloatArray, NumpyFloatArray]:
+    def _fit(self):
         """
         Perform diffusion map embedding.
-
-        :returns: diffusion_coordinates, eigenvalues, eigenvectors
-
         """
 
         if self.is_sparse:
@@ -112,11 +116,7 @@ def _fit(self) -> tuple[NumpyFloatArray, NumpyFloatArray, NumpyFloatArray]:
 
         # Compute L^alpha = D^(-alpha)*L*D^(-alpha).
         m = d_inv.shape[0]
-        if self.is_sparse:
-            d_alpha = sps.spdiags(d_inv, 0, m, m)
-        else:
-            d_alpha = np.diag(d_inv)
-
+        d_alpha = sps.spdiags(d_inv, 0, m, m) if self.is_sparse else np.diag(d_inv)
         l_star = d_alpha.dot(self.kernel_matrix.dot(d_alpha))
 
         # d_star, d_star_inv = self._d_matrix(l_star, 1.0)
@@ -169,9 +169,7 @@ def __sparse_kernel(kernel_matrix, neighbors_number):
             if sum(kernel_matrix[i, :]) <= 0:
                 raise ValueError("UQpy: Consider increasing `n_neighbors` to have a connected graph.")
 
-        sparse_kernel_matrix = sps.csc_matrix(kernel_matrix)
-
-        return sparse_kernel_matrix
+        return sps.csc_matrix(kernel_matrix)
 
     @staticmethod
     def diffusion_distance(diffusion_coordinates: Numpy2DFloatArray) -> Numpy2DFloatArray:
@@ -201,8 +199,7 @@ def __normalize_kernel_matrix(self, kernel_matrix, inverse_diagonal_matrix):
 
         return normalized_kernel
 
-    @staticmethod
-    def parsimonious(eigenvectors: Numpy2DFloatArray, dim: int) -> tuple[list, NumpyFloatArray]:
+    def parsimonious(self, dim: int):
         """
         Selection of independent vectors for parsimonious data manifold embedding, based on
         local regression.  The eigenvectors with the largest residuals are considered for the
@@ -212,23 +209,22 @@ def parsimonious(eigenvectors: Numpy2DFloatArray, dim: int) -> tuple[list, Numpy
 
             scale = median(distances) / 3
 
-        :param eigenvectors: Eigenvectors of the diffusion maps embedding.
         :param dim: Number of eigenvectors to select with largest residuals.
-        :returns: indices, residuals
         """
 
-        residuals = np.zeros(eigenvectors.shape[1])
+        residuals = np.zeros(self.eigenvectors.shape[1])
         residuals[0] = np.nan
         # residual 1 for the first eigenvector.
         residuals[1] = 1.0
 
         # Get the residuals of each eigenvector.
-        for i in range(2, eigenvectors.shape[1]):
-            residuals[i] = DiffusionMaps.__get_residual(f_mat=eigenvectors[:, 1:i], f=eigenvectors[:, i])
+        for i in range(2, self.eigenvectors.shape[1]):
+            residuals[i] = DiffusionMaps.__get_residual(f_mat=self.eigenvectors[:, 1:i], f=self.eigenvectors[:, i])
 
         # Get the index of the eigenvalues associated with each residual.
         indices = np.argsort(residuals)[::-1][1:dim + 1]
-        return indices, residuals
+        self.parsimonious_indices = indices
+        self.parsimonious_residuals = residuals
 
     @staticmethod
     def __get_residual(f_mat, f):
@@ -255,64 +251,17 @@ def __get_residual(f_mat, f):
         return residual
 
     @staticmethod
-    def estimate_cut_off(data, k_nn: int = 20, n_partition: Union[None, int] = None,
-                         distance_matrix: Union[None, Numpy2DFloatArray] = None,
-                         random_state: Union[None, int] = None) -> float:
-        """
-        Estimates the cut-off for a Gaussian kernel, given a tolerance below which the kernel values are
-        considered zero.
-
-        :param data: Cloud of data points.
-        :param k_nn: k-th nearest neighbor distance to estimate the cut-off distance.
-        :param n_partition: maximum subsample used for the estimation. Ignored if :code:`distance_matrix is not None`.
-        :param distance_matrix:  Pre-computed distance matrix.
-        :param random_state: sets :code:`np.random.default_rng(random_state)`.
-        :return:
-
-        """
-        data = np.atleast_2d(data)
-        n_points = data.shape[0]
-        if n_points < 10:
-            d = scipy.spatial.distance.pdist(data)
-            return np.max(d)
-
-        if distance_matrix is None:
-            if n_partition is not None:
-                random_indices = np.random.default_rng(random_state).permutation(n_points)
-                distance_matrix = sd.cdist(data[random_indices[:n_partition]], data, metric='euclidean')
-                k = np.min([k_nn, distance_matrix.shape[1]])
-                k_smallest_values = np.partition(distance_matrix, k - 1, axis=1)[:, k - 1]
-            else:
-                distance_matrix = sd.squareform(sd.pdist(data, metric='euclidean'))
-                k = np.min([k_nn, distance_matrix.shape[1]])
-                k_smallest_values = np.partition(distance_matrix, k - 1, axis=1)[:, k - 1]
-        else:
-            k_smallest_values = np.partition(distance_matrix, k_nn - 1, axis=1)[:, k_nn - 1]
-        est_cutoff = np.max(k_smallest_values)
-        return float(est_cutoff)
-
-
-
-    @staticmethod
-    def eig_solver(kernel_matrix: Numpy2DFloatArray, is_symmetric: bool, n_eigenvectors: int) -> \
-            tuple[NumpyFloatArray, Numpy2DFloatArray]:
+    def eig_solver(kernel_matrix: Numpy2DFloatArray, is_symmetric: bool, n_eigenvectors: int) -> tuple[
+        NumpyFloatArray, Numpy2DFloatArray]:
 
         n_samples, n_features = kernel_matrix.shape
 
         if n_eigenvectors == n_features:
-            if is_symmetric:
-                solver = sp.linalg.eigh
-            else:
-                solver = sp.linalg.eig
-
+            solver = sp.linalg.eigh if is_symmetric else sp.linalg.eig
             solver_kwargs = {"check_finite": False}
 
         else:
-            if is_symmetric:
-                solver = eigsh
-            else:
-                solver = eigs
-
+            solver = eigsh if is_symmetric else eigs
             solver_kwargs = {
                 "sigma": None,
                 "k": n_eigenvectors,
@@ -393,4 +342,3 @@ def _plot_eigen_pairs(eigenvectors: Numpy2DFloatArray,
                         cmap=plt.cm.Spectral)
             plt.title(
                 r"$\Psi_{{{}}}$ vs. $\Psi_{{{}}}$".format(pair_indices[0], pair_indices[1]))
-
diff --git a/src/UQpy/utilities/kernels/BinetCauchyKernel.py b/src/UQpy/utilities/kernels/BinetCauchyKernel.py
@@ -16,7 +16,7 @@ class BinetCauchyKernel(GrassmannianKernel):
     def apply_method(self, points):
         points.evaluate_matrix(self, self.calculate_kernel_matrix)
 
-    def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
+    def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
         """
         Compute the Binet-Cauchy kernel entry for two points on the Grassmann manifold.
 

diff --git a/src/UQpy/utilities/kernels/GaussianKernel.py b/src/UQpy/utilities/kernels/GaussianKernel.py
@@ -18,7 +18,7 @@ def __init__(self, epsilon: float = 1.0):
         super().__init__()
         self.epsilon = epsilon
 
-    def kernel_entry(self, xi, xj):
+    def _kernel_entry(self, xi, xj):
         return np.linalg.norm(xi - xj, "fro") ** 2
 
     def kernel_function(self, distance_pairs):

diff --git a/src/UQpy/utilities/kernels/ProjectionKernel.py b/src/UQpy/utilities/kernels/ProjectionKernel.py
@@ -16,7 +16,7 @@ class ProjectionKernel(GrassmannianKernel):
     def apply_method(self, points):
         points.evaluate_matrix(self, self.calculate_kernel_matrix)
 
-    def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
+    def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
         """
         Compute the Projection kernel entry for two points on the Grassmann manifold.