From f0399287c07415c04c0981151ca6811371b7095a Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 28 Sep 2021 15:10:34 +0200 Subject: [PATCH 1/8] Initial implementation of 64-bit eig --- cpp/include/raft/linalg/cusolver_wrappers.h | 77 +++++++++++++++++++++ cpp/include/raft/linalg/eig.cuh | 77 +++++++++++++++++++-- 2 files changed, 150 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 0eadf47fe3..6b08ec6c38 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -719,5 +719,82 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT } /** @} */ +/** + * @defgroup DnXsyevd cusolver DnXsyevd operations + * @{ + */ +template +cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const T *A, + int64_t lda, const T *W, size_t *workspaceInBytesOnDevice, + size_t *workspaceInBytesOnHost, cudaStream_t stream); + + +template <> +inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const float *A, + int64_t lda, const float *W, size_t *workspaceInBytesOnDevice, + size_t *workspaceInBytesOnHost, cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd_bufferSize( + handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, + CUDA_R_32F, W, CUDA_R_32F, workspaceInBytesOnDevice, + workspaceInBytesOnHost); +} + +template <> +inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const double *A, + int64_t lda, const double *W, size_t *workspaceInBytesOnDevice, + size_t *workspaceInBytesOnHost, cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd_bufferSize( + handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, + CUDA_R_64F, W, CUDA_R_64F, workspaceInBytesOnDevice, + workspaceInBytesOnHost); +} + + +template +cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, T *A, + int64_t lda, T *W, T *bufferOnDevice, size_t workspaceInBytesOnDevice, + T *bufferOnHost, size_t workspaceInBytesOnHost, int *info, + cudaStream_t stream); + +template <> +inline cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, float *A, + int64_t lda, float *W, float *bufferOnDevice, size_t workspaceInBytesOnDevice, + float *bufferOnHost, size_t workspaceInBytesOnHost, int *info, + cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd( + handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, + CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice, workspaceInBytesOnDevice, + bufferOnHost, workspaceInBytesOnHost, info); +} + +template <> +inline cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, + cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, double *A, + int64_t lda, double *W, double *bufferOnDevice, size_t workspaceInBytesOnDevice, + double *bufferOnHost, size_t workspaceInBytesOnHost, int *info, + cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd( + handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, + CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice, workspaceInBytesOnDevice, + bufferOnHost, workspaceInBytesOnHost, info); +} + +/** @} */ + } // namespace linalg } // namespace raft diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 5b2df3bcb3..f1eadfcee7 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -155,15 +155,15 @@ void eigSelDC(const raft::handle_t &handle, math_t *in, int n_rows, int n_cols, * @{ */ template -void eigJacobi(const raft::handle_t &handle, const math_t *in, int n_rows, - int n_cols, math_t *eig_vectors, math_t *eig_vals, - cudaStream_t stream, math_t tol = 1.e-7, int sweeps = 15) { +void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, + std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, + cudaStream_t stream, math_t tol = 1.e-7, std::size_t sweeps = 15) { cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); syevjInfo_t syevj_params = nullptr; CUSOLVER_CHECK(cusolverDnCreateSyevjInfo(&syevj_params)); CUSOLVER_CHECK(cusolverDnXsyevjSetTolerance(syevj_params, tol)); - CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, sweeps)); + CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast(sweeps))); int lwork; CUSOLVER_CHECK(cusolverDnsyevj_bufferSize( @@ -188,5 +188,74 @@ void eigJacobi(const raft::handle_t &handle, const math_t *in, int n_rows, CUSOLVER_CHECK(cusolverDnDestroySyevjInfo(syevj_params)); } + +/** + * @defgroup overloaded function for eig decomp with QR method for the + * column-major symmetric matrices (in parameter) + * @param handle: raft handle + * @param n_rows: number of rows of the input + * @param n_cols: number of cols of the input + * @param eig_vectors: eigenvectors + * @param eig_vals: eigen values + * @param tol: error tolerance for the jacobi method. Algorithm stops when the + * error is below tol + * @param sweeps: number of sweeps in the Jacobi algorithm. The more the better + * accuracy. + * @{ + */ + template + void eigQR(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, + std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, + cudaStream_t stream) { + cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); + + cusolverDnParams_t dn_params = nullptr; + CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params)); + + size_t workspaceDevice = 0; + size_t workspaceHost = 0; + CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize( + cusolverH, + dn_params, + CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), + eig_vectors, + static_cast(n_cols), + eig_vals, + &workspaceDevice, + &workspaceHost, + stream + )); + + rmm::device_uvector d_work(workspaceDevice, stream); + rmm::device_scalar dev_info(stream); + std::vector h_work(workspaceHost); + + raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); + + CUSOLVER_CHECK(cusolverDnxsyevd( + cusolverH, + dn_params, + CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), + eig_vectors, + static_cast(n_cols), + eig_vals, + d_work.data(), + &workspaceDevice, + h_work.data(), + &workspaceHost, + dev_info.data(), + stream + )); + + + CUDA_CHECK(cudaGetLastError()); + CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params)); + } + + }; // end namespace linalg }; // end namespace raft From ba7281e935982a34c21cae02c1709d34c8d39fa0 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 29 Sep 2021 22:36:34 +0200 Subject: [PATCH 2/8] Fix vector size --- cpp/include/raft/linalg/eig.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index f1eadfcee7..5400d1f61b 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -228,9 +228,9 @@ void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_row stream )); - rmm::device_uvector d_work(workspaceDevice, stream); + rmm::device_uvector d_work(workspaceDevice / sizeof(math_t), stream); rmm::device_scalar dev_info(stream); - std::vector h_work(workspaceHost); + std::vector h_work(workspaceHost / sizeof(math_t)); raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); @@ -244,9 +244,9 @@ void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_row static_cast(n_cols), eig_vals, d_work.data(), - &workspaceDevice, + workspaceDevice, h_work.data(), - &workspaceHost, + workspaceHost, dev_info.data(), stream )); From 2b4da97b9a0a88757e5e37b0a9d0a00187823835 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 30 Sep 2021 19:42:57 +0200 Subject: [PATCH 3/8] Fix style --- cpp/include/raft/linalg/cusolver_wrappers.h | 77 ++++++-------- cpp/include/raft/linalg/eig.cuh | 112 +++++--------------- 2 files changed, 61 insertions(+), 128 deletions(-) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 6b08ec6c38..8aafddb534 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -725,73 +725,66 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT */ template cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const T *A, - int64_t lda, const T *W, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost, cudaStream_t stream); - + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const T *A, int64_t lda, const T *W, + size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost, + cudaStream_t stream); template <> inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const float *A, - int64_t lda, const float *W, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost, cudaStream_t stream) { + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const float *A, int64_t lda, const float *W, + size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost, + cudaStream_t stream) { CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); return cusolverDnXsyevd_bufferSize( - handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, - CUDA_R_32F, W, CUDA_R_32F, workspaceInBytesOnDevice, - workspaceInBytesOnHost); + handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, CUDA_R_32F, W, + CUDA_R_32F, workspaceInBytesOnDevice, workspaceInBytesOnHost); } template <> inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, const double *A, - int64_t lda, const double *W, size_t *workspaceInBytesOnDevice, + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const double *A, int64_t lda, + const double *W, size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost, cudaStream_t stream) { CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); return cusolverDnXsyevd_bufferSize( - handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, - CUDA_R_64F, W, CUDA_R_64F, workspaceInBytesOnDevice, - workspaceInBytesOnHost); + handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, CUDA_R_64F, W, + CUDA_R_64F, workspaceInBytesOnDevice, workspaceInBytesOnHost); } - template cusolverStatus_t cusolverDnxsyevd( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, T *A, - int64_t lda, T *W, T *bufferOnDevice, size_t workspaceInBytesOnDevice, - T *bufferOnHost, size_t workspaceInBytesOnHost, int *info, - cudaStream_t stream); + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, T *A, int64_t lda, T *W, T *bufferOnDevice, + size_t workspaceInBytesOnDevice, T *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream); template <> inline cusolverStatus_t cusolverDnxsyevd( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, float *A, - int64_t lda, float *W, float *bufferOnDevice, size_t workspaceInBytesOnDevice, - float *bufferOnHost, size_t workspaceInBytesOnHost, int *info, - cudaStream_t stream) { + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, float *A, int64_t lda, float *W, + float *bufferOnDevice, size_t workspaceInBytesOnDevice, float *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) { CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); - return cusolverDnXsyevd( - handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, - CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, info); + return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, + CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice, + workspaceInBytesOnDevice, bufferOnHost, + workspaceInBytesOnHost, info); } template <> inline cusolverStatus_t cusolverDnxsyevd( // NOLINT - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, double *A, - int64_t lda, double *W, double *bufferOnDevice, size_t workspaceInBytesOnDevice, - double *bufferOnHost, size_t workspaceInBytesOnHost, int *info, - cudaStream_t stream) { + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, double *A, int64_t lda, double *W, + double *bufferOnDevice, size_t workspaceInBytesOnDevice, double *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) { CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); - return cusolverDnXsyevd( - handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, - CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, info); + return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, + CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice, + workspaceInBytesOnDevice, bufferOnHost, + workspaceInBytesOnHost, info); } /** @} */ diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 5400d1f61b..8eebb3b166 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -42,27 +42,35 @@ namespace linalg { * @{ */ template -void eigDC(const raft::handle_t &handle, const math_t *in, int n_rows, - int n_cols, math_t *eig_vectors, math_t *eig_vals, +void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, + std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, cudaStream_t stream) { cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); - int lwork; - CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, in, - n_cols, eig_vals, &lwork)); + cusolverDnParams_t dn_params = nullptr; + CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params)); - rmm::device_uvector d_work(lwork, stream); + size_t workspaceDevice = 0; + size_t workspaceHost = 0; + CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize( + cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), eig_vectors, static_cast(n_cols), + eig_vals, &workspaceDevice, &workspaceHost, stream)); + + rmm::device_uvector d_work(workspaceDevice / sizeof(math_t), stream); rmm::device_scalar d_dev_info(stream); + std::vector h_work(workspaceHost / sizeof(math_t)); raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); - CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, - n_cols, eig_vals, d_work.data(), lwork, - d_dev_info.data(), stream)); - CUDA_CHECK(cudaGetLastError()); + CUSOLVER_CHECK(cusolverDnxsyevd( + cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), eig_vectors, static_cast(n_cols), + eig_vals, d_work.data(), workspaceDevice, h_work.data(), workspaceHost, + d_dev_info.data(), stream)); + CUDA_CHECK(cudaGetLastError()); + CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params)); int dev_info = d_dev_info.value(stream); ASSERT(dev_info == 0, "eig.cuh: eigensolver couldn't converge to a solution. " @@ -155,15 +163,17 @@ void eigSelDC(const raft::handle_t &handle, math_t *in, int n_rows, int n_cols, * @{ */ template -void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, - std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, - cudaStream_t stream, math_t tol = 1.e-7, std::size_t sweeps = 15) { +void eigJacobi(const raft::handle_t &handle, const math_t *in, + std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, + math_t *eig_vals, cudaStream_t stream, math_t tol = 1.e-7, + std::size_t sweeps = 15) { cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); syevjInfo_t syevj_params = nullptr; CUSOLVER_CHECK(cusolverDnCreateSyevjInfo(&syevj_params)); CUSOLVER_CHECK(cusolverDnXsyevjSetTolerance(syevj_params, tol)); - CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast(sweeps))); + CUSOLVER_CHECK( + cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast(sweeps))); int lwork; CUSOLVER_CHECK(cusolverDnsyevj_bufferSize( @@ -187,75 +197,5 @@ void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_row CUDA_CHECK(cudaGetLastError()); CUSOLVER_CHECK(cusolverDnDestroySyevjInfo(syevj_params)); } - - -/** - * @defgroup overloaded function for eig decomp with QR method for the - * column-major symmetric matrices (in parameter) - * @param handle: raft handle - * @param n_rows: number of rows of the input - * @param n_cols: number of cols of the input - * @param eig_vectors: eigenvectors - * @param eig_vals: eigen values - * @param tol: error tolerance for the jacobi method. Algorithm stops when the - * error is below tol - * @param sweeps: number of sweeps in the Jacobi algorithm. The more the better - * accuracy. - * @{ - */ - template - void eigQR(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, - std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, - cudaStream_t stream) { - cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); - - cusolverDnParams_t dn_params = nullptr; - CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params)); - - size_t workspaceDevice = 0; - size_t workspaceHost = 0; - CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize( - cusolverH, - dn_params, - CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, - static_cast(n_rows), - eig_vectors, - static_cast(n_cols), - eig_vals, - &workspaceDevice, - &workspaceHost, - stream - )); - - rmm::device_uvector d_work(workspaceDevice / sizeof(math_t), stream); - rmm::device_scalar dev_info(stream); - std::vector h_work(workspaceHost / sizeof(math_t)); - - raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); - - CUSOLVER_CHECK(cusolverDnxsyevd( - cusolverH, - dn_params, - CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, - static_cast(n_rows), - eig_vectors, - static_cast(n_cols), - eig_vals, - d_work.data(), - workspaceDevice, - h_work.data(), - workspaceHost, - dev_info.data(), - stream - )); - - - CUDA_CHECK(cudaGetLastError()); - CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params)); - } - - }; // end namespace linalg }; // end namespace raft From eb6cf13abe0386911129a7bc3cfd051b98c591eb Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Fri, 1 Oct 2021 16:40:50 +0200 Subject: [PATCH 4/8] Fix style --- cpp/include/raft/linalg/eig.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 8eebb3b166..4c8b0e6c43 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -197,5 +197,6 @@ void eigJacobi(const raft::handle_t &handle, const math_t *in, CUDA_CHECK(cudaGetLastError()); CUSOLVER_CHECK(cusolverDnDestroySyevjInfo(syevj_params)); } + }; // end namespace linalg }; // end namespace raft From fc0964519f7a4971363cd850208be13c6e609930 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 5 Oct 2021 12:09:26 +0200 Subject: [PATCH 5/8] Added support for cuda version < 11.1 --- cpp/include/raft/linalg/cusolver_wrappers.h | 2 ++ cpp/include/raft/linalg/eig.cuh | 32 +++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 8aafddb534..8c1b381dce 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -719,6 +719,7 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT } /** @} */ +#if CUDART_VERSION >= 11010 /** * @defgroup DnXsyevd cusolver DnXsyevd operations * @{ @@ -786,6 +787,7 @@ inline cusolverStatus_t cusolverDnxsyevd( // NOLINT workspaceInBytesOnDevice, bufferOnHost, workspaceInBytesOnHost, info); } +#endif /** @} */ diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 4c8b0e6c43..aa4943e4c9 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -45,6 +45,9 @@ template void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, cudaStream_t stream) { +#if CUDART_VERSION < 11010 + eigDC_legacy(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream); +#else cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cusolverDnParams_t dn_params = nullptr; @@ -75,6 +78,35 @@ void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, ASSERT(dev_info == 0, "eig.cuh: eigensolver couldn't converge to a solution. " "This usually occurs when some of the features do not vary enough."); +#endif +} + +template +void eigDC_legacy(const raft::handle_t &handle, const math_t *in, + std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, + math_t *eig_vals, cudaStream_t stream) { + cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); + + int lwork; + CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, in, + n_cols, eig_vals, &lwork)); + + rmm::device_uvector d_work(lwork, stream); + rmm::device_scalar d_dev_info(stream); + + raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); + + CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, + n_cols, eig_vals, d_work.data(), lwork, + d_dev_info.data(), stream)); + CUDA_CHECK(cudaGetLastError()); + + auto dev_info = d_dev_info.value(stream); + ASSERT(dev_info == 0, + "eig.cuh: eigensolver couldn't converge to a solution. " + "This usually occurs when some of the features do not vary enough."); } enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; From 3b8cd756d846f60a394d8fdff1051084ca60676d Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 5 Oct 2021 13:24:03 +0200 Subject: [PATCH 6/8] fix eigdc legacy --- cpp/include/raft/linalg/eig.cuh | 56 ++++++++++++++++----------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index aa4943e4c9..bac8fa1fc3 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -28,6 +28,34 @@ namespace raft { namespace linalg { +template +void eigDC_legacy(const raft::handle_t &handle, const math_t *in, + std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, + math_t *eig_vals, cudaStream_t stream) { + cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); + + int lwork; + CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, in, + n_cols, eig_vals, &lwork)); + + rmm::device_uvector d_work(lwork, stream); + rmm::device_scalar d_dev_info(stream); + + raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); + + CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, + n_cols, eig_vals, d_work.data(), lwork, + d_dev_info.data(), stream)); + CUDA_CHECK(cudaGetLastError()); + + auto dev_info = d_dev_info.value(stream); + ASSERT(dev_info == 0, + "eig.cuh: eigensolver couldn't converge to a solution. " + "This usually occurs when some of the features do not vary enough."); +} + /** * @defgroup eig decomp with divide and conquer method for the column-major * symmetric matrices @@ -81,34 +109,6 @@ void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, #endif } -template -void eigDC_legacy(const raft::handle_t &handle, const math_t *in, - std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, - math_t *eig_vals, cudaStream_t stream) { - cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); - - int lwork; - CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, in, - n_cols, eig_vals, &lwork)); - - rmm::device_uvector d_work(lwork, stream); - rmm::device_scalar d_dev_info(stream); - - raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); - - CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, - n_cols, eig_vals, d_work.data(), lwork, - d_dev_info.data(), stream)); - CUDA_CHECK(cudaGetLastError()); - - auto dev_info = d_dev_info.value(stream); - ASSERT(dev_info == 0, - "eig.cuh: eigensolver couldn't converge to a solution. " - "This usually occurs when some of the features do not vary enough."); -} - enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; #if CUDART_VERSION >= 10010 From c00e4dc9bc25499e9d5ef6417808cc3d5ef1eaad Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sun, 10 Oct 2021 12:08:43 +0200 Subject: [PATCH 7/8] Fix comment --- cpp/include/raft/linalg/cusolver_wrappers.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 8c1b381dce..6aa5e74455 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -787,9 +787,8 @@ inline cusolverStatus_t cusolverDnxsyevd( // NOLINT workspaceInBytesOnDevice, bufferOnHost, workspaceInBytesOnHost, info); } -#endif - /** @} */ +#endif } // namespace linalg } // namespace raft From db54b165167bf4c13f39e244b64292449f6065fc Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Fri, 22 Oct 2021 12:15:34 +0200 Subject: [PATCH 8/8] Change sweeps type --- cpp/include/raft/linalg/eig.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index bac8fa1fc3..4161ddb8b9 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -198,7 +198,7 @@ template void eigJacobi(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, cudaStream_t stream, math_t tol = 1.e-7, - std::size_t sweeps = 15) { + std::uint32_t sweeps = 15) { cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); syevjInfo_t syevj_params = nullptr;