Skip to content

Commit

Permalink
DiffusionMaps and Kernels refactoring according to #168
Browse files Browse the repository at this point in the history
  • Loading branch information
dimtsap committed Apr 6, 2022
1 parent 0ecb1cf commit 9cc21da
Show file tree
Hide file tree
Showing 14 changed files with 87 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,8 @@

# %%

dmaps = DiffusionMaps.build_from_data(data=X, alpha=1, n_eigenvectors=5,
kernel=GaussianKernel(epsilon=0.3))

# %% md
#
# Use the method `mapping` to compute the diffusion coordinates assuming `epsilon=0.3`.

# %%

dmaps.fit()
dmaps = DiffusionMaps(data=X, alpha=1, n_eigenvectors=5,
kernel=GaussianKernel(epsilon=0.3))

# %% md
#
Expand All @@ -90,4 +82,3 @@
ax = fig.gca(projection='3d')
ax.scatter(x, y, z, c=color, cmap=plt.cm.Spectral, s=8)
plt.show()

Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,25 @@
# Case 1: Find the optimal parameter of the Gaussian kernel scale epsilon
kernel = GaussianKernel()

dmaps_object = DiffusionMaps.build_from_data(data=X,
alpha=1.0, n_eigenvectors=9,
is_sparse=True, n_neighbors=100,
optimize_parameters=True,
kernel=kernel)
dmaps_object = DiffusionMaps(data=X,
alpha=1.0, n_eigenvectors=9,
is_sparse=True, n_neighbors=100,
kernel=kernel)

print('epsilon', kernel.epsilon)

# %% md
# Fit the data and calculate the embedding, the eigenvectors and eigenvalues
dmaps_object.fit()

# %% md
#
# Find the parsimonious representation of the eigenvectors. Identify the two most informative
# eigenvectors.

index, residuals = DiffusionMaps.parsimonious(dmaps_object.eigenvectors, 2)
dmaps_object.parsimonious(dim=2)

print('most informative eigenvectors:', index)
print('most informative eigenvectors:', dmaps_object.parsimonious_indices)

# %% md
#
# Plot the diffusion coordinates

DiffusionMaps._plot_eigen_pairs(dmaps_object.eigenvectors, pair_indices=index, color=X_color, figure_size=[12, 12])
DiffusionMaps._plot_eigen_pairs(dmaps_object.eigenvectors, pair_indices=dmaps_object.parsimonious_indices,
color=X_color, figure_size=[12, 12])
plt.show()
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@
#
# Diffusion maps with Grassmann kernel
kernel = ProjectionKernel()
kernel_matrix = kernel.calculate_kernel_matrix(psi)
kernel.calculate_kernel_matrix(psi)


Gdmaps_UQpy = DiffusionMaps(alpha=1.0, n_eigenvectors=5, is_sparse=True, n_neighbors=250,
kernel_matrix=kernel_matrix)
Gdmaps_UQpy.fit()
kernel_matrix=kernel.kernel_matrix)

#%%
#
Expand Down
24 changes: 15 additions & 9 deletions docs/code/dimension_reduction/grassmann/plot_grassmann_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,11 @@
# %%
projection_kernel = ProjectionKernel()

kernel_psi = projection_kernel.calculate_kernel_matrix(points=manifold_projection.u)
kernel_phi = projection_kernel.calculate_kernel_matrix(points=manifold_projection.v)
projection_kernel.calculate_kernel_matrix(points=manifold_projection.u)
kernel_psi = projection_kernel.kernel_matrix

projection_kernel.calculate_kernel_matrix(points=manifold_projection.v)
kernel_phi = projection_kernel.kernel_matrix

fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text('kernel_psi')
Expand All @@ -89,9 +92,10 @@

# %%

kernel_01 = projection_kernel.calculate_kernel_matrix(points=[manifold_projection.u[0],
manifold_projection.u[1],
manifold_projection.u[2]])
projection_kernel.calculate_kernel_matrix(points=[manifold_projection.u[0],
manifold_projection.u[1],
manifold_projection.u[2]])
kernel_01 = projection_kernel.kernel_matrix

fig = plt.figure()
plt.imshow(kernel_01)
Expand All @@ -105,18 +109,20 @@
# %%
from UQpy.utilities.kernels.baseclass.GrassmannianKernel import GrassmannianKernel


class UserKernel(GrassmannianKernel):

def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint):
def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint):
r = np.dot(xi.data.T, xj.data)
det = np.linalg.det(r)
return det * det


user_kernel = UserKernel()
kernel_user_psi = user_kernel.calculate_kernel_matrix(points=manifold_projection.u)
kernel_user_phi = user_kernel.calculate_kernel_matrix(points=manifold_projection.v)
user_kernel.calculate_kernel_matrix(points=manifold_projection.u)
kernel_user_psi = user_kernel.kernel_matrix

user_kernel.calculate_kernel_matrix(points=manifold_projection.v)
kernel_user_phi = user_kernel.kernel_matrix

fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text('kernel_psi')
Expand Down
4 changes: 3 additions & 1 deletion docs/source/dimension_reduction/dmaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@ One can use the following method to instantiate the :class:`.DiffusionMaps` clas
Methods
~~~~~~~~~~~
.. autoclass:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps
:members: build_from_data, fit, parsimonious, estimate_epsilon
:members: parsimonious

Attributes
~~~~~~~~~~~~~
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.transition_matrix
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.diffusion_coordinates
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.eigenvectors
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.eigenvalues
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.parsimonious_indices
.. autoattribute:: UQpy.dimension_reduction.diffusion_maps.DiffusionMaps.parsimonious_residuals

Examples
~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion docs/source/utilities/kernels/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The :class:`UQpy.utilities.kernels.baseclass.Kernel` class is imported using the
>>> from UQpy.utilities.kernels.baseclass.Kernel import Kernel

.. autoclass:: UQpy.utilities.kernels.baseclass.Kernel
:members: kernel_entry
:members: _kernel_entry


.. toctree::
Expand Down
118 changes: 33 additions & 85 deletions src/UQpy/dimension_reduction/diffusion_maps/DiffusionMaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ class DiffusionMaps:

@beartype
def __init__(
self,
kernel_matrix: Numpy2DFloatArray,
data: Union[list[np.ndarray], list[GrassmannPoint]] = None,
kernel: Kernel = None,
alpha: AlphaType = 0.5,
n_eigenvectors: IntegerLargerThanUnityType = 2,
is_sparse: bool = False,
n_neighbors: IntegerLargerThanUnityType = 1,
random_state: RandomStateType = None,
t: int = 1
self,
kernel_matrix: Numpy2DFloatArray = None,
data: Union[np.ndarray, list[GrassmannPoint]] = None,
kernel: Kernel = None,
alpha: AlphaType = 0.5,
n_eigenvectors: IntegerLargerThanUnityType = 2,
is_sparse: bool = False,
n_neighbors: IntegerLargerThanUnityType = 1,
random_state: RandomStateType = None,
t: int = 1
):
"""
Expand All @@ -57,10 +57,18 @@ def __init__(
object itself can be passed directly.
:param t: Time exponent.
"""
self.parsimonious_residuals = None
"""Residuals calculated from the Parsimonious Representation. This attribute will only be populated if the
:py:meth:`parsimonious` method is invoked."""
self.parsimonious_indices = None
"""Indices of the most important eigenvectors. This attribute will only be populated if the
:py:meth:`parsimonious` method is invoked."""

if kernel_matrix is not None:
self.kernel_matrix = kernel_matrix
elif data is not None and kernel is not None:
self.kernel_matrix = kernel.calculate_kernel_matrix(points=data)
kernel.calculate_kernel_matrix(points=data)
self.kernel_matrix = kernel.kernel_matrix
else:
raise ValueError("Either `kernel_matrix` or both `data` and `kernel` must be provided")

Expand Down Expand Up @@ -90,15 +98,11 @@ def __init__(
'''
self.cut_off = None

if kernel_matrix is not None:
self.kernel_matrix = kernel_matrix
self._fit()

def _fit(self) -> tuple[NumpyFloatArray, NumpyFloatArray, NumpyFloatArray]:
def _fit(self):
"""
Perform diffusion map embedding.
:returns: diffusion_coordinates, eigenvalues, eigenvectors
"""

if self.is_sparse:
Expand All @@ -112,11 +116,7 @@ def _fit(self) -> tuple[NumpyFloatArray, NumpyFloatArray, NumpyFloatArray]:

# Compute L^alpha = D^(-alpha)*L*D^(-alpha).
m = d_inv.shape[0]
if self.is_sparse:
d_alpha = sps.spdiags(d_inv, 0, m, m)
else:
d_alpha = np.diag(d_inv)

d_alpha = sps.spdiags(d_inv, 0, m, m) if self.is_sparse else np.diag(d_inv)
l_star = d_alpha.dot(self.kernel_matrix.dot(d_alpha))

# d_star, d_star_inv = self._d_matrix(l_star, 1.0)
Expand Down Expand Up @@ -169,9 +169,7 @@ def __sparse_kernel(kernel_matrix, neighbors_number):
if sum(kernel_matrix[i, :]) <= 0:
raise ValueError("UQpy: Consider increasing `n_neighbors` to have a connected graph.")

sparse_kernel_matrix = sps.csc_matrix(kernel_matrix)

return sparse_kernel_matrix
return sps.csc_matrix(kernel_matrix)

@staticmethod
def diffusion_distance(diffusion_coordinates: Numpy2DFloatArray) -> Numpy2DFloatArray:
Expand Down Expand Up @@ -201,8 +199,7 @@ def __normalize_kernel_matrix(self, kernel_matrix, inverse_diagonal_matrix):

return normalized_kernel

@staticmethod
def parsimonious(eigenvectors: Numpy2DFloatArray, dim: int) -> tuple[list, NumpyFloatArray]:
def parsimonious(self, dim: int):
"""
Selection of independent vectors for parsimonious data manifold embedding, based on
local regression. The eigenvectors with the largest residuals are considered for the
Expand All @@ -212,23 +209,22 @@ def parsimonious(eigenvectors: Numpy2DFloatArray, dim: int) -> tuple[list, Numpy
scale = median(distances) / 3
:param eigenvectors: Eigenvectors of the diffusion maps embedding.
:param dim: Number of eigenvectors to select with largest residuals.
:returns: indices, residuals
"""

residuals = np.zeros(eigenvectors.shape[1])
residuals = np.zeros(self.eigenvectors.shape[1])
residuals[0] = np.nan
# residual 1 for the first eigenvector.
residuals[1] = 1.0

# Get the residuals of each eigenvector.
for i in range(2, eigenvectors.shape[1]):
residuals[i] = DiffusionMaps.__get_residual(f_mat=eigenvectors[:, 1:i], f=eigenvectors[:, i])
for i in range(2, self.eigenvectors.shape[1]):
residuals[i] = DiffusionMaps.__get_residual(f_mat=self.eigenvectors[:, 1:i], f=self.eigenvectors[:, i])

# Get the index of the eigenvalues associated with each residual.
indices = np.argsort(residuals)[::-1][1:dim + 1]
return indices, residuals
self.parsimonious_indices = indices
self.parsimonious_residuals = residuals

@staticmethod
def __get_residual(f_mat, f):
Expand All @@ -255,64 +251,17 @@ def __get_residual(f_mat, f):
return residual

@staticmethod
def estimate_cut_off(data, k_nn: int = 20, n_partition: Union[None, int] = None,
distance_matrix: Union[None, Numpy2DFloatArray] = None,
random_state: Union[None, int] = None) -> float:
"""
Estimates the cut-off for a Gaussian kernel, given a tolerance below which the kernel values are
considered zero.
:param data: Cloud of data points.
:param k_nn: k-th nearest neighbor distance to estimate the cut-off distance.
:param n_partition: maximum subsample used for the estimation. Ignored if :code:`distance_matrix is not None`.
:param distance_matrix: Pre-computed distance matrix.
:param random_state: sets :code:`np.random.default_rng(random_state)`.
:return:
"""
data = np.atleast_2d(data)
n_points = data.shape[0]
if n_points < 10:
d = scipy.spatial.distance.pdist(data)
return np.max(d)

if distance_matrix is None:
if n_partition is not None:
random_indices = np.random.default_rng(random_state).permutation(n_points)
distance_matrix = sd.cdist(data[random_indices[:n_partition]], data, metric='euclidean')
k = np.min([k_nn, distance_matrix.shape[1]])
k_smallest_values = np.partition(distance_matrix, k - 1, axis=1)[:, k - 1]
else:
distance_matrix = sd.squareform(sd.pdist(data, metric='euclidean'))
k = np.min([k_nn, distance_matrix.shape[1]])
k_smallest_values = np.partition(distance_matrix, k - 1, axis=1)[:, k - 1]
else:
k_smallest_values = np.partition(distance_matrix, k_nn - 1, axis=1)[:, k_nn - 1]
est_cutoff = np.max(k_smallest_values)
return float(est_cutoff)



@staticmethod
def eig_solver(kernel_matrix: Numpy2DFloatArray, is_symmetric: bool, n_eigenvectors: int) -> \
tuple[NumpyFloatArray, Numpy2DFloatArray]:
def eig_solver(kernel_matrix: Numpy2DFloatArray, is_symmetric: bool, n_eigenvectors: int) -> tuple[
NumpyFloatArray, Numpy2DFloatArray]:

n_samples, n_features = kernel_matrix.shape

if n_eigenvectors == n_features:
if is_symmetric:
solver = sp.linalg.eigh
else:
solver = sp.linalg.eig

solver = sp.linalg.eigh if is_symmetric else sp.linalg.eig
solver_kwargs = {"check_finite": False}

else:
if is_symmetric:
solver = eigsh
else:
solver = eigs

solver = eigsh if is_symmetric else eigs
solver_kwargs = {
"sigma": None,
"k": n_eigenvectors,
Expand Down Expand Up @@ -393,4 +342,3 @@ def _plot_eigen_pairs(eigenvectors: Numpy2DFloatArray,
cmap=plt.cm.Spectral)
plt.title(
r"$\Psi_{{{}}}$ vs. $\Psi_{{{}}}$".format(pair_indices[0], pair_indices[1]))

2 changes: 1 addition & 1 deletion src/UQpy/utilities/kernels/BinetCauchyKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class BinetCauchyKernel(GrassmannianKernel):
def apply_method(self, points):
points.evaluate_matrix(self, self.calculate_kernel_matrix)

def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
"""
Compute the Binet-Cauchy kernel entry for two points on the Grassmann manifold.
Expand Down
2 changes: 1 addition & 1 deletion src/UQpy/utilities/kernels/GaussianKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, epsilon: float = 1.0):
super().__init__()
self.epsilon = epsilon

def kernel_entry(self, xi, xj):
def _kernel_entry(self, xi, xj):
return np.linalg.norm(xi - xj, "fro") ** 2

def kernel_function(self, distance_pairs):
Expand Down
2 changes: 1 addition & 1 deletion src/UQpy/utilities/kernels/ProjectionKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ProjectionKernel(GrassmannianKernel):
def apply_method(self, points):
points.evaluate_matrix(self, self.calculate_kernel_matrix)

def kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
def _kernel_entry(self, xi: GrassmannPoint, xj: GrassmannPoint) -> float:
"""
Compute the Projection kernel entry for two points on the Grassmann manifold.
Expand Down
Loading

0 comments on commit 9cc21da

Please sign in to comment.