diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 0eadf47fe3..6aa5e74455 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -719,5 +719,76 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT } /** @} */ +#if CUDART_VERSION >= 11010 +/** + * @defgroup DnXsyevd cusolver DnXsyevd operations + * @{ + */ +template +cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const T *A, int64_t lda, const T *W, + size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost, + cudaStream_t stream); + +template <> +inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const float *A, int64_t lda, const float *W, + size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost, + cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd_bufferSize( + handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, CUDA_R_32F, W, + CUDA_R_32F, workspaceInBytesOnDevice, workspaceInBytesOnHost); +} + +template <> +inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, const double *A, int64_t lda, + const double *W, size_t *workspaceInBytesOnDevice, + size_t *workspaceInBytesOnHost, cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd_bufferSize( + handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, CUDA_R_64F, W, + CUDA_R_64F, workspaceInBytesOnDevice, workspaceInBytesOnHost); +} + +template +cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, T *A, int64_t lda, T *W, T *bufferOnDevice, + size_t workspaceInBytesOnDevice, T *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream); + +template <> +inline cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, float *A, int64_t lda, float *W, + float *bufferOnDevice, size_t workspaceInBytesOnDevice, float *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, + CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice, + workspaceInBytesOnDevice, bufferOnHost, + workspaceInBytesOnHost, info); +} + +template <> +inline cusolverStatus_t cusolverDnxsyevd( // NOLINT + cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz, + cublasFillMode_t uplo, int64_t n, double *A, int64_t lda, double *W, + double *bufferOnDevice, size_t workspaceInBytesOnDevice, double *bufferOnHost, + size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) { + CUSOLVER_CHECK(cusolverDnSetStream(handle, stream)); + return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, + CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice, + workspaceInBytesOnDevice, bufferOnHost, + workspaceInBytesOnHost, info); +} +/** @} */ +#endif + } // namespace linalg } // namespace raft diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 5b2df3bcb3..4161ddb8b9 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -28,6 +28,34 @@ namespace raft { namespace linalg { +template +void eigDC_legacy(const raft::handle_t &handle, const math_t *in, + std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, + math_t *eig_vals, cudaStream_t stream) { + cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); + + int lwork; + CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, in, + n_cols, eig_vals, &lwork)); + + rmm::device_uvector d_work(lwork, stream); + rmm::device_scalar d_dev_info(stream); + + raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); + + CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, + CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, + n_cols, eig_vals, d_work.data(), lwork, + d_dev_info.data(), stream)); + CUDA_CHECK(cudaGetLastError()); + + auto dev_info = d_dev_info.value(stream); + ASSERT(dev_info == 0, + "eig.cuh: eigensolver couldn't converge to a solution. " + "This usually occurs when some of the features do not vary enough."); +} + /** * @defgroup eig decomp with divide and conquer method for the column-major * symmetric matrices @@ -42,31 +70,43 @@ namespace linalg { * @{ */ template -void eigDC(const raft::handle_t &handle, const math_t *in, int n_rows, - int n_cols, math_t *eig_vectors, math_t *eig_vals, +void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows, + std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals, cudaStream_t stream) { +#if CUDART_VERSION < 11010 + eigDC_legacy(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream); +#else cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); - int lwork; - CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, in, - n_cols, eig_vals, &lwork)); + cusolverDnParams_t dn_params = nullptr; + CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params)); - rmm::device_uvector d_work(lwork, stream); + size_t workspaceDevice = 0; + size_t workspaceHost = 0; + CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize( + cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), eig_vectors, static_cast(n_cols), + eig_vals, &workspaceDevice, &workspaceHost, stream)); + + rmm::device_uvector d_work(workspaceDevice / sizeof(math_t), stream); rmm::device_scalar d_dev_info(stream); + std::vector h_work(workspaceHost / sizeof(math_t)); raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream); - CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, - CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors, - n_cols, eig_vals, d_work.data(), lwork, - d_dev_info.data(), stream)); - CUDA_CHECK(cudaGetLastError()); + CUSOLVER_CHECK(cusolverDnxsyevd( + cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, + static_cast(n_rows), eig_vectors, static_cast(n_cols), + eig_vals, d_work.data(), workspaceDevice, h_work.data(), workspaceHost, + d_dev_info.data(), stream)); + CUDA_CHECK(cudaGetLastError()); + CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params)); int dev_info = d_dev_info.value(stream); ASSERT(dev_info == 0, "eig.cuh: eigensolver couldn't converge to a solution. " "This usually occurs when some of the features do not vary enough."); +#endif } enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; @@ -155,15 +195,17 @@ void eigSelDC(const raft::handle_t &handle, math_t *in, int n_rows, int n_cols, * @{ */ template -void eigJacobi(const raft::handle_t &handle, const math_t *in, int n_rows, - int n_cols, math_t *eig_vectors, math_t *eig_vals, - cudaStream_t stream, math_t tol = 1.e-7, int sweeps = 15) { +void eigJacobi(const raft::handle_t &handle, const math_t *in, + std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors, + math_t *eig_vals, cudaStream_t stream, math_t tol = 1.e-7, + std::uint32_t sweeps = 15) { cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); syevjInfo_t syevj_params = nullptr; CUSOLVER_CHECK(cusolverDnCreateSyevjInfo(&syevj_params)); CUSOLVER_CHECK(cusolverDnXsyevjSetTolerance(syevj_params, tol)); - CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, sweeps)); + CUSOLVER_CHECK( + cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast(sweeps))); int lwork; CUSOLVER_CHECK(cusolverDnsyevj_bufferSize(