Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to consistency of MNMG PCA/TSVD solvers (docs + code consolidation) #4556

Merged
merged 7 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions python/cuml/dask/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ class PCA(BaseDecomposition,
the data. N_components is usually small, say at 3, where it can be used for
data visualization, data compression and exploratory analysis.

cuML's multi-node multi-gpu (MNMG) PCA expects a dask cuDF input, and
provides a "Full" algorithm. It uses a full eigendecomposition
then selects the top K eigenvectors.
cuML's multi-node multi-gpu (MNMG) PCA expects a dask-cuDF object as input
and provides 2 algorithms, Full and Jacobi. Full (default) uses a full
eigendecomposition then selects the top K eigenvectors. The Jacobi algorithm
can be much faster as it iteratively tries to correct the top K eigenvectors,
but might be less accurate.

Examples
--------
Expand Down Expand Up @@ -107,9 +109,10 @@ class PCA(BaseDecomposition,
n_components : int (default = 1)
The number of top K singular vectors / values you want.
Must be <= number(columns).
svd_solver : 'full', 'jacobi', or 'tsqr'
'full': run exact full SVD and select the components by postprocessing
'jacobi': iteratively compute SVD of the covariance matrix
svd_solver : 'full', 'jacobi', 'auto'
'full': Run exact full SVD and select the components by postprocessing
'jacobi': Iteratively compute SVD of the covariance matrix
'auto': For compatiblity with Scikit-learn. Alias for 'jacobi'.
verbose : int or boolean, default=False
Sets logging level. It must be one of `cuml.common.logger.level_*`.
See :ref:`verbosity-levels` for more info.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/decomposition/tsvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class TruncatedSVD(BaseDecomposition,
n_components : int (default = 1)
The number of top K singular vectors / values you want.
Must be <= number(columns).
svd_solver : 'full'
svd_solver : 'full', 'jacobi'
Only Full algorithm is supported since it's significantly faster on GPU
then the other solvers including randomized SVD.
verbose : int or boolean, default=False
Expand Down
28 changes: 2 additions & 26 deletions python/cuml/decomposition/pca_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,13 @@ import cuml.common.opg_data_utils_mg as opg
import cuml.internals
from cuml.common.base import Base
from cuml.raft.common.handle cimport handle_t
from cuml.decomposition.utils cimport paramsSolver
from cuml.decomposition.utils cimport *
from cuml.common.opg_data_utils_mg cimport *

from cuml.decomposition import PCA
from cuml.decomposition.base_mg import BaseDecompositionMG


ctypedef int underlying_type_t_solver


cdef extern from "cuml/decomposition/pca_mg.hpp" namespace "ML":

ctypedef enum mg_solver "ML::mg_solver":
COV_EIG_DQ "ML::mg_solver::COV_EIG_DQ"
COV_EIG_JACOBI "ML::mg_solver::COV_EIG_JACOBI"
QR "ML::mg_solver::QR"

cdef cppclass paramsTSVDMG(paramsSolver):
size_t n_components
mg_solver algorithm # = solver::COV_EIG_DQ

cdef cppclass paramsPCAMG(paramsTSVDMG):
bool copy
bool whiten


cdef extern from "cuml/decomposition/pca_mg.hpp" namespace "ML::PCA::opg":

Expand Down Expand Up @@ -87,13 +69,6 @@ cdef extern from "cuml/decomposition/pca_mg.hpp" namespace "ML::PCA::opg":
paramsPCAMG &prms,
bool verbose) except +


class MGSolver(IntEnum):
COV_EIG_DQ = <underlying_type_t_solver> mg_solver.COV_EIG_DQ
COV_EIG_JACOBI = <underlying_type_t_solver> mg_solver.COV_EIG_JACOBI
QR = <underlying_type_t_solver> mg_solver.QR


class PCAMG(BaseDecompositionMG, PCA):

def __init__(self, **kwargs):
Expand All @@ -103,6 +78,7 @@ class PCAMG(BaseDecompositionMG, PCA):
algo_map = {
'full': MGSolver.COV_EIG_DQ,
'auto': MGSolver.COV_EIG_JACOBI,
'jacobi': MGSoler.COV_EIG_JACOBI,
# 'arpack': NOT_SUPPORTED,
# 'randomized': NOT_SUPPORTED,
}
Expand Down
18 changes: 15 additions & 3 deletions python/cuml/decomposition/tsvd_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ cdef extern from "cuml/decomposition/tsvd_mg.hpp" namespace "ML::TSVD::opg":
float *explained_var,
float *explained_var_ratio,
float *singular_vals,
paramsTSVD &prms,
paramsTSVDMG &prms,
bool verbose) except +

cdef void fit_transform(handle_t& handle,
Expand All @@ -60,7 +60,7 @@ cdef extern from "cuml/decomposition/tsvd_mg.hpp" namespace "ML::TSVD::opg":
double *explained_var,
double *explained_var_ratio,
double *singular_vals,
paramsTSVD &prms,
paramsTSVDMG &prms,
bool verbose) except +


Expand All @@ -69,6 +69,18 @@ class TSVDMG(BaseDecompositionMG, TruncatedSVD):
def __init__(self, **kwargs):
super(TSVDMG, self).__init__(**kwargs)

def _build_params(self, n_rows, n_cols):
cpdef paramsPCAMG *params = new paramsTSVDMG()
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
params.n_components = self._n_components
params.n_rows = n_rows
params.n_cols = n_cols
params.n_iterations = self.iterated_power
params.tol = self.tol
params.algorithm = <mg_solver> (<underlying_type_t_solver> (
self.c_algorithm))

return <size_t>params

@cuml.internals.api_base_return_any_skipall
def _call_fit(self, X, trans, rank, input_desc,
trans_desc, arg_params):
Expand All @@ -80,7 +92,7 @@ class TSVDMG(BaseDecompositionMG, TruncatedSVD):
cdef uintptr_t singular_vals_ptr = self.singular_values_.ptr
cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()

cdef paramsTSVD *params = <paramsTSVD*><size_t>arg_params
cdef paramsTSVDMG *params = <paramsTSVDMG*><size_t>arg_params

if self.dtype == np.float32:

Expand Down
20 changes: 20 additions & 0 deletions python/cuml/decomposition/utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@ from libcpp cimport bool

ctypedef int underlying_type_t_solver

class MGSolver(IntEnum):
COV_EIG_DQ = <underlying_type_t_solver> mg_solver.COV_EIG_DQ
COV_EIG_JACOBI = <underlying_type_t_solver> mg_solver.COV_EIG_JACOBI
QR = <underlying_type_t_solver> mg_solver.QR

cdef extern from "cuml/decomposition/pca_mg.hpp" namespace "ML" nogil:

ctypedef enum mg_solver "ML::mg_solver":
COV_EIG_DQ "ML::mg_solver::COV_EIG_DQ"
COV_EIG_JACOBI "ML::mg_solver::COV_EIG_JACOBI"
QR "ML::mg_solver::QR"

cdef cppclass paramsTSVDMG(paramsSolver):
size_t n_components
mg_solver algorithm # = solver::COV_EIG_DQ

cdef cppclass paramsPCAMG(paramsTSVDMG):
bool copy
bool whiten

cdef extern from "cuml/decomposition/params.hpp" namespace "ML" nogil:

ctypedef enum solver "ML::solver":
Expand Down