From 2c408f26b44110dbbd27d9cbf97276d8ccf80169 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 1 Nov 2022 18:01:15 +0100 Subject: [PATCH 1/9] Add gather and gemmi replacement --- .../raft/sparse/detail/cusparse_wrappers.h | 188 +++++++++--------- 1 file changed, 90 insertions(+), 98 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 041991521b..cb1af2eb68 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -18,46 +18,46 @@ #include #include -#include +#include +#include namespace raft { namespace sparse { namespace detail { /** - * @defgroup gthr cusparse gather methods + * @defgroup gather cusparse gather methods * @{ */ -template -cusparseStatus_t cusparsegthr( - cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const double* vals, - double* vals_sorted, - int* d_P, - cudaStream_t stream) +cusparseStatus_t cusparsegather( + cusparseHandle_t handle, cusparseDnVecDescr_t vecY, cusparseSpVecDescr_t vecX, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); -#pragma GCC diagnostic pop + return cusparseGather(handle, vecY, vecX); } -template <> -inline cusparseStatus_t cusparsegthr(cusparseHandle_t handle, - int nnz, - const float* vals, - float* vals_sorted, - int* d_P, - cudaStream_t stream) + +template +cusparseStatus_t cusparsegthr( + cusparseHandle_t handle, int nnz, const T* dY, T* dX_values, int* dX_indices, cudaStream_t stream) { + static_assert(std::is_same_v || std::is_same_v, "Unsupported data type"); + CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgthr(handle, nnz, vals, vals_sorted, d_P, CUSPARSE_INDEX_BASE_ZERO); -#pragma GCC diagnostic pop + auto size = nnz; + auto math_type = std::is_same_v ? CUDA_R_32F : CUDA_R_64F; + cusparseSpVecDescr_t vecX; + cusparseDnVecDescr_t vecY; + // Create sparse vector X + CUSPARSE_CHECK(cusparseCreateSpVec(&vecX, size, nnz, dX_indices, dX_values, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, math_type)); + // Create dense vector y + CUSPARSE_CHECK(cusparseCreateDnVec(&vecY, size, dY, math_type)); + auto returnValue = cusparsegather(handle, vecY, vecX, stream); + + // destroy matrix/vector descriptors + CUSPARSE_CHECK(cusparseDestroySpVec(vecX)); + CUSPARSE_CHECK(cusparseDestroyDnVec(vecY)); + return returnValue; } /** @} */ @@ -138,77 +138,6 @@ inline void cusparsecoosortByRow( // NOLINT } /** @} */ -/** - * @defgroup Gemmi cusparse gemmi operations - * @{ - */ -template -cusparseStatus_t cusparsegemmi( // NOLINT - cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const T* alpha, - const T* A, - int lda, - const T* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const T* beta, - T* C, - int ldc, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const float* alpha, - const float* A, - int lda, - const float* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const float* beta, - float* C, - int ldc, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseSgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); -#pragma GCC diagnostic pop -} -template <> -inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, - int m, - int n, - int k, - int nnz, - const double* alpha, - const double* A, - int lda, - const double* cscValB, - const int* cscColPtrB, - const int* cscRowIndB, - const double* beta, - double* C, - int ldc, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDgemmi( - handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, beta, C, ldc); -#pragma GCC diagnostic pop -} -/** @} */ - #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP /** * @defgroup cusparse Create CSR operations @@ -687,6 +616,69 @@ inline cusparseStatus_t cusparsecsrmm(cusparseHandle_t handle, /** @} */ #endif +/** + * @defgroup Gemmi cusparse gemmi operations + * @{ + */ +template +cusparseStatus_t cusparsegemmi( // NOLINT + cusparseHandle_t handle, + int m, + int n, + int k, + int nnz, + const T* alpha, + const T* A, + int lda, + const T* cscValB, + const int* cscColPtrB, + const int* cscRowIndB, + const T* beta, + T* C, + int ldc, + cudaStream_t stream) +{ + static_assert(std::is_same_v || std::is_same_v, "Unsupported data type"); + + cusparseDnMatDescr_t matA; + cusparseSpMatDescr_t matB; + cusparseDnMatDescr_t matC; + + auto math_type = std::is_same_v ? CUDA_R_32F : CUDA_R_64F; + // Create sparse matrix B + CUSPARSE_CHECK(cusparseCreateCsc(&matB, + k, + n, + nnz, + cscColPtrB, + cscRowIndB, + cscValB, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + math_type)); + // Create dense matrices + CUSPARSE_CHECK(cusparseCreateDnMat(&matA, m, k, lda, A, math_type, CUSPARSE_ORDER_ROW)); + CUSPARSE_CHECK(cusparseCreateDnMat(&matC, m, n, ldc, C, math_type, CUSPARSE_ORDER_ROW)); + + + cusparseOperation_t opA = CUSPARSE_OPERATION_TRANSPOSE; + cusparseOperation_t opB = CUSPARSE_OPERATION_TRANSPOSE; + cusparseSpMMAlg_t alg = CUSPARSE_SPMM_CSR_ALG2; + size_t buffer_size = 0; + + CUSPARSE_CHECK(cusparsespmm_bufferSize(handle, opA, opB, alpha, matB, matA, beta, matC, alg, &buffer_size, stream)); + rmm::device_uvector external_buffer(buffer_size, stream); + auto return_value = cusparsespmm(handle, opA, opB, alpha, matB, matA, beta, matC, alg, external_buffer.data(), stream); + + // destroy matrix/vector descriptors + CUSPARSE_CHECK(cusparseDestroyDnMat(matA)); + CUSPARSE_CHECK(cusparseDestroySpMat(matB)); + CUSPARSE_CHECK(cusparseDestroyDnMat(matC)); + return return_value; +} +/** @} */ + /** * @defgroup csr2coo cusparse CSR to COO converter methods * @{ From 8c8cf85ceea31ef68f74da8d62b623e11e742f9a Mon Sep 17 00:00:00 2001 From: William Hicks Date: Fri, 4 Nov 2022 18:26:51 -0400 Subject: [PATCH 2/9] Remove gemm2 wrappers --- .../raft/sparse/detail/cusparse_wrappers.h | 338 +----------------- 1 file changed, 2 insertions(+), 336 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index cb1af2eb68..43a4b48a94 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -29,7 +29,7 @@ namespace detail { * @defgroup gather cusparse gather methods * @{ */ -cusparseStatus_t cusparsegather( +inline cusparseStatus_t cusparsegather( cusparseHandle_t handle, cusparseDnVecDescr_t vecY, cusparseSpVecDescr_t vecX, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); @@ -1239,340 +1239,6 @@ inline cusparseStatus_t cusparsecsr2csc(cusparseHandle_t handle, /** @} */ -/** - * @defgroup csrgemm2 cusparse sparse gemm operations - * @{ - */ - -template -cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const T* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream); - -template <> -inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const float* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); -#pragma GCC diagnostic pop -} - -template <> -inline cusparseStatus_t cusparsecsrgemm2_buffersizeext(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const double* beta, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - csrgemm2Info_t info, - size_t* pBufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2_bufferSizeExt(handle, - m, - n, - k, - alpha, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - beta, - matD, - nnzD, - rowindD, - indicesD, - info, - pBufferSizeInBytes); -#pragma GCC diagnostic pop -} - -inline cusparseStatus_t cusparsecsrgemm2nnz(cusparseHandle_t handle, - int m, - int n, - int k, - const cusparseMatDescr_t matA, - int nnzA, - const int* rowindA, - const int* indicesA, - const cusparseMatDescr_t matB, - int nnzB, - const int* rowindB, - const int* indicesB, - const cusparseMatDescr_t matD, - int nnzD, - const int* rowindD, - const int* indicesD, - const cusparseMatDescr_t matC, - int* rowindC, - int* nnzC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseXcsrgemm2Nnz(handle, - m, - n, - k, - matA, - nnzA, - rowindA, - indicesA, - matB, - nnzB, - rowindB, - indicesB, - matD, - nnzD, - rowindD, - indicesD, - matC, - rowindC, - nnzC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -template -cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const T* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const T* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const T* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const T* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - T* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream); - -template <> -inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const float* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const float* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const float* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const float* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - float* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseScsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -template <> -inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, - int m, - int n, - int k, - const double* alpha, - const cusparseMatDescr_t descrA, - int nnzA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const cusparseMatDescr_t descrB, - int nnzB, - const double* csrValB, - const int* csrRowPtrB, - const int* csrColIndB, - const double* beta, - const cusparseMatDescr_t descrD, - int nnzD, - const double* csrValD, - const int* csrRowPtrD, - const int* csrColIndD, - const cusparseMatDescr_t descrC, - double* csrValC, - const int* csrRowPtrC, - int* csrColIndC, - const csrgemm2Info_t info, - void* pBuffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - return cusparseDcsrgemm2(handle, - m, - n, - k, - alpha, - descrA, - nnzA, - csrValA, - csrRowPtrA, - csrColIndA, - descrB, - nnzB, - csrValB, - csrRowPtrB, - csrColIndB, - beta, - descrD, - nnzD, - csrValD, - csrRowPtrD, - csrColIndD, - descrC, - csrValC, - csrRowPtrC, - csrColIndC, - info, - pBuffer); -#pragma GCC diagnostic pop -} - -/** @} */ - /** * @defgroup csrgemm2 cusparse sparse gemm operations * @{ @@ -1807,4 +1473,4 @@ inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, } // namespace detail } // namespace sparse -} // namespace raft \ No newline at end of file +} // namespace raft From 1d49532fc3c110a9ba7860823e2392e5c546252c Mon Sep 17 00:00:00 2001 From: William Hicks Date: Fri, 4 Nov 2022 18:34:48 -0400 Subject: [PATCH 3/9] Correct cusparse gthr wrapper --- .../raft/sparse/detail/cusparse_wrappers.h | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 43a4b48a94..a67e1c70ec 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -36,28 +36,25 @@ inline cusparseStatus_t cusparsegather( return cusparseGather(handle, vecY, vecX); } -template +template < + typename T, + typename std::enable_if_t || std::is_same_v>* = nullptr +> cusparseStatus_t cusparsegthr( - cusparseHandle_t handle, int nnz, const T* dY, T* dX_values, int* dX_indices, cudaStream_t stream) -{ - static_assert(std::is_same_v || std::is_same_v, "Unsupported data type"); - + cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream){ + auto constexpr float_type = []() constexpr { + if constexpr (std::is_same_v) { + return CUDA_R_32F; + } else if constexpr (std::is_same_v) { + return CUDA_R_64F; + } + }(); CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - auto size = nnz; - auto math_type = std::is_same_v ? CUDA_R_32F : CUDA_R_64F; - cusparseSpVecDescr_t vecX; - cusparseDnVecDescr_t vecY; - // Create sparse vector X - CUSPARSE_CHECK(cusparseCreateSpVec(&vecX, size, nnz, dX_indices, dX_values, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, math_type)); - // Create dense vector y - CUSPARSE_CHECK(cusparseCreateDnVec(&vecY, size, dY, math_type)); - auto returnValue = cusparsegather(handle, vecY, vecX, stream); - - // destroy matrix/vector descriptors - CUSPARSE_CHECK(cusparseDestroySpVec(vecX)); - CUSPARSE_CHECK(cusparseDestroyDnVec(vecY)); - return returnValue; + auto dense_vector_descr = cusparseDnVecDescr_t{}; + auto sparse_vector_descr = cusparseSpVecDescr_t{}; + CUSPARSE_CHECK(cusparseCreateDnVec(&dense_vector_descr, nnz, static_cast(const_cast(vals)), float_type)); + CUSPARSE_CHECK(cusparseCreateSpVec(&sparse_vector_descr, nnz, nnz, static_cast(d_P), static_cast(vals_sorted), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, float_type)); + return cusparseGather(handle, dense_vector_descr, sparse_vector_descr); } /** @} */ From 78c5e030cca999f43ef0a3c9f7494df0877bce6d Mon Sep 17 00:00:00 2001 From: William Hicks Date: Fri, 4 Nov 2022 18:49:26 -0400 Subject: [PATCH 4/9] Update style --- .../raft/sparse/detail/cusparse_wrappers.h | 65 +++++++++++-------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index a67e1c70ec..c8488625c6 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -17,8 +17,8 @@ #pragma once #include -#include #include +#include #include namespace raft { @@ -29,8 +29,10 @@ namespace detail { * @defgroup gather cusparse gather methods * @{ */ -inline cusparseStatus_t cusparsegather( - cusparseHandle_t handle, cusparseDnVecDescr_t vecY, cusparseSpVecDescr_t vecX, cudaStream_t stream) +inline cusparseStatus_t cusparsegather(cusparseHandle_t handle, + cusparseDnVecDescr_t vecY, + cusparseSpVecDescr_t vecX, + cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); return cusparseGather(handle, vecY, vecX); @@ -38,22 +40,32 @@ inline cusparseStatus_t cusparsegather( template < typename T, - typename std::enable_if_t || std::is_same_v>* = nullptr -> + typename std::enable_if_t || std::is_same_v>* = nullptr> cusparseStatus_t cusparsegthr( - cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream){ - auto constexpr float_type = []() constexpr { + cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream) +{ + auto constexpr float_type = []() constexpr + { if constexpr (std::is_same_v) { return CUDA_R_32F; } else if constexpr (std::is_same_v) { return CUDA_R_64F; } - }(); + } + (); CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - auto dense_vector_descr = cusparseDnVecDescr_t{}; + auto dense_vector_descr = cusparseDnVecDescr_t{}; auto sparse_vector_descr = cusparseSpVecDescr_t{}; - CUSPARSE_CHECK(cusparseCreateDnVec(&dense_vector_descr, nnz, static_cast(const_cast(vals)), float_type)); - CUSPARSE_CHECK(cusparseCreateSpVec(&sparse_vector_descr, nnz, nnz, static_cast(d_P), static_cast(vals_sorted), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, float_type)); + CUSPARSE_CHECK(cusparseCreateDnVec( + &dense_vector_descr, nnz, static_cast(const_cast(vals)), float_type)); + CUSPARSE_CHECK(cusparseCreateSpVec(&sparse_vector_descr, + nnz, + nnz, + static_cast(d_P), + static_cast(vals_sorted), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + float_type)); return cusparseGather(handle, dense_vector_descr, sparse_vector_descr); } /** @} */ @@ -644,29 +656,30 @@ cusparseStatus_t cusparsegemmi( // NOLINT auto math_type = std::is_same_v ? CUDA_R_32F : CUDA_R_64F; // Create sparse matrix B CUSPARSE_CHECK(cusparseCreateCsc(&matB, - k, - n, - nnz, - cscColPtrB, - cscRowIndB, - cscValB, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - math_type)); + k, + n, + nnz, + cscColPtrB, + cscRowIndB, + cscValB, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + math_type)); // Create dense matrices CUSPARSE_CHECK(cusparseCreateDnMat(&matA, m, k, lda, A, math_type, CUSPARSE_ORDER_ROW)); CUSPARSE_CHECK(cusparseCreateDnMat(&matC, m, n, ldc, C, math_type, CUSPARSE_ORDER_ROW)); - cusparseOperation_t opA = CUSPARSE_OPERATION_TRANSPOSE; cusparseOperation_t opB = CUSPARSE_OPERATION_TRANSPOSE; - cusparseSpMMAlg_t alg = CUSPARSE_SPMM_CSR_ALG2; - size_t buffer_size = 0; + cusparseSpMMAlg_t alg = CUSPARSE_SPMM_CSR_ALG2; + size_t buffer_size = 0; - CUSPARSE_CHECK(cusparsespmm_bufferSize(handle, opA, opB, alpha, matB, matA, beta, matC, alg, &buffer_size, stream)); + CUSPARSE_CHECK(cusparsespmm_bufferSize( + handle, opA, opB, alpha, matB, matA, beta, matC, alg, &buffer_size, stream)); rmm::device_uvector external_buffer(buffer_size, stream); - auto return_value = cusparsespmm(handle, opA, opB, alpha, matB, matA, beta, matC, alg, external_buffer.data(), stream); + auto return_value = cusparsespmm( + handle, opA, opB, alpha, matB, matA, beta, matC, alg, external_buffer.data(), stream); // destroy matrix/vector descriptors CUSPARSE_CHECK(cusparseDestroyDnMat(matA)); From e0dc1b959a54835362e79beffe1b3b2b753e98bb Mon Sep 17 00:00:00 2001 From: William Hicks Date: Fri, 4 Nov 2022 19:05:45 -0400 Subject: [PATCH 5/9] Properly cast to void in cusparseSpMM --- cpp/include/raft/sparse/detail/cusparse_wrappers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index c8488625c6..6643e689a2 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -532,7 +532,7 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); return cusparseSpMM( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_32F, alg, externalBuffer); + handle, opA, opB, static_cast(alpha), matA, matB, static_cast(beta), matC, CUDA_R_32F, alg, static_cast(externalBuffer)); } template <> inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, @@ -549,7 +549,7 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); return cusparseSpMM( - handle, opA, opB, alpha, matA, matB, beta, matC, CUDA_R_64F, alg, externalBuffer); + handle, opA, opB, static_cast(alpha), matA, matB, static_cast(beta), matC, CUDA_R_64F, alg, static_cast(externalBuffer)); } /** @} */ #else From 399b5f10b97add650bbc46e90f6f5d6825d28ad1 Mon Sep 17 00:00:00 2001 From: William Hicks Date: Fri, 4 Nov 2022 19:09:22 -0400 Subject: [PATCH 6/9] Correct pointer casts --- .../raft/sparse/detail/cusparse_wrappers.h | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 6643e689a2..0d2c1471e3 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -57,7 +57,7 @@ cusparseStatus_t cusparsegthr( auto dense_vector_descr = cusparseDnVecDescr_t{}; auto sparse_vector_descr = cusparseSpVecDescr_t{}; CUSPARSE_CHECK(cusparseCreateDnVec( - &dense_vector_descr, nnz, static_cast(const_cast(vals)), float_type)); + &dense_vector_descr, nnz, static_cast(const_cast(vals)), float_type)); CUSPARSE_CHECK(cusparseCreateSpVec(&sparse_vector_descr, nnz, nnz, @@ -531,8 +531,17 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM( - handle, opA, opB, static_cast(alpha), matA, matB, static_cast(beta), matC, CUDA_R_32F, alg, static_cast(externalBuffer)); + return cusparseSpMM(handle, + opA, + opB, + static_cast(alpha), + matA, + matB, + static_cast(beta), + matC, + CUDA_R_32F, + alg, + static_cast(externalBuffer)); } template <> inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, @@ -548,8 +557,17 @@ inline cusparseStatus_t cusparsespmm(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - return cusparseSpMM( - handle, opA, opB, static_cast(alpha), matA, matB, static_cast(beta), matC, CUDA_R_64F, alg, static_cast(externalBuffer)); + return cusparseSpMM(handle, + opA, + opB, + static_cast(alpha), + matA, + matB, + static_cast(beta), + matC, + CUDA_R_64F, + alg, + static_cast(externalBuffer)); } /** @} */ #else From b6e33dadf3da4dd134dee5b7aac569d4801f7bee Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 7 Nov 2022 14:42:44 +0100 Subject: [PATCH 7/9] Fix type casting, restore vector cleaning --- .../raft/sparse/detail/cusparse_wrappers.h | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index 0d2c1471e3..b649f0c615 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -66,7 +66,10 @@ cusparseStatus_t cusparsegthr( CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, float_type)); - return cusparseGather(handle, dense_vector_descr, sparse_vector_descr); + auto return_value = cusparseGather(handle, dense_vector_descr, sparse_vector_descr); + CUSPARSE_CHECK(cusparseDestroyDnVec(dense_vector_descr)); + CUSPARSE_CHECK(cusparseDestroySpVec(sparse_vector_descr)); + return return_value; } /** @} */ @@ -677,16 +680,18 @@ cusparseStatus_t cusparsegemmi( // NOLINT k, n, nnz, - cscColPtrB, - cscRowIndB, - cscValB, + static_cast(const_cast(cscColPtrB)), + static_cast(const_cast(cscRowIndB)), + static_cast(const_cast(cscValB)), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, math_type)); // Create dense matrices - CUSPARSE_CHECK(cusparseCreateDnMat(&matA, m, k, lda, A, math_type, CUSPARSE_ORDER_ROW)); - CUSPARSE_CHECK(cusparseCreateDnMat(&matC, m, n, ldc, C, math_type, CUSPARSE_ORDER_ROW)); + CUSPARSE_CHECK(cusparseCreateDnMat( + &matA, m, k, lda, static_cast(const_cast(A)), math_type, CUSPARSE_ORDER_ROW)); + CUSPARSE_CHECK(cusparseCreateDnMat( + &matC, m, n, ldc, static_cast(const_cast(C)), math_type, CUSPARSE_ORDER_ROW)); cusparseOperation_t opA = CUSPARSE_OPERATION_TRANSPOSE; cusparseOperation_t opB = CUSPARSE_OPERATION_TRANSPOSE; @@ -695,7 +700,8 @@ cusparseStatus_t cusparsegemmi( // NOLINT CUSPARSE_CHECK(cusparsespmm_bufferSize( handle, opA, opB, alpha, matB, matA, beta, matC, alg, &buffer_size, stream)); - rmm::device_uvector external_buffer(buffer_size, stream); + buffer_size = buffer_size / sizeof(T); + rmm::device_uvector external_buffer(buffer_size, stream); auto return_value = cusparsespmm( handle, opA, opB, alpha, matB, matA, beta, matC, alg, external_buffer.data(), stream); From 5c76aaa8a68b0888056170e59e2c2cef32fd7ac3 Mon Sep 17 00:00:00 2001 From: William Hicks Date: Mon, 7 Nov 2022 14:26:52 -0500 Subject: [PATCH 8/9] Remove cusparseAlgMode_t usage --- .../raft/sparse/detail/cusparse_wrappers.h | 330 ------------------ 1 file changed, 330 deletions(-) diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index b649f0c615..c8e4229203 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -759,336 +759,6 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle, } /** @} */ -/** - * @defgroup CsrmvEx cusparse csrmvex operations - * @{ - */ -template -cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - size_t* bufferSizeInBytes, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - -#if CUDART_VERSION >= 11020 - cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - const_cast(csrRowPtrA), - const_cast(csrColIndA), - const_cast(csrValA)); - - cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); - - cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); - - cusparseStatus_t result = cusparseSpMV_bufferSize(handle, - transA, - alpha, - matA, - vecX, - beta, - vecY, - CUDA_R_32F, - CUSPARSE_SPMV_ALG_DEFAULT, - bufferSizeInBytes); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; - -#else - - return cusparseCsrmvEx_bufferSize(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_32F, - descrA, - csrValA, - CUDA_R_32F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_32F, - beta, - CUDA_R_32F, - y, - CUDA_R_32F, - CUDA_R_32F, - bufferSizeInBytes); -#endif -} -template <> -inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - size_t* bufferSizeInBytes, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - -#if CUDART_VERSION >= 11020 - cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - const_cast(csrRowPtrA), - const_cast(csrColIndA), - const_cast(csrValA)); - - cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); - - cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); - - cusparseStatus_t result = cusparseSpMV_bufferSize(handle, - transA, - alpha, - matA, - vecX, - beta, - vecY, - CUDA_R_64F, - CUSPARSE_SPMV_ALG_DEFAULT, - bufferSizeInBytes); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; -#else - return cusparseCsrmvEx_bufferSize(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_64F, - descrA, - csrValA, - CUDA_R_64F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_64F, - beta, - CUDA_R_64F, - y, - CUDA_R_64F, - CUDA_R_64F, - bufferSizeInBytes); -#endif -} - -template -cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const T* alpha, - const cusparseMatDescr_t descrA, - const T* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const T* x, - const T* beta, - T* y, - T* buffer, - cudaStream_t stream); -template <> -inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const float* alpha, - const cusparseMatDescr_t descrA, - const float* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const float* x, - const float* beta, - float* y, - float* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - -#if CUDART_VERSION >= 11020 - cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - const_cast(csrRowPtrA), - const_cast(csrColIndA), - const_cast(csrValA)); - - cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); - - cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); - - cusparseStatus_t result = cusparseSpMV( - handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; -#else - return cusparseCsrmvEx(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_32F, - descrA, - csrValA, - CUDA_R_32F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_32F, - beta, - CUDA_R_32F, - y, - CUDA_R_32F, - CUDA_R_32F, - buffer); -#endif -} -template <> -inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, - cusparseAlgMode_t alg, - cusparseOperation_t transA, - int m, - int n, - int nnz, - const double* alpha, - const cusparseMatDescr_t descrA, - const double* csrValA, - const int* csrRowPtrA, - const int* csrColIndA, - const double* x, - const double* beta, - double* y, - double* buffer, - cudaStream_t stream) -{ - CUSPARSE_CHECK(cusparseSetStream(handle, stream)); - -#if CUDART_VERSION >= 11020 - cusparseSpMatDescr_t matA; - cusparsecreatecsr(&matA, - m, - n, - nnz, - const_cast(csrRowPtrA), - const_cast(csrColIndA), - const_cast(csrValA)); - - cusparseDnVecDescr_t vecX; - cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); - - cusparseDnVecDescr_t vecY; - cusparsecreatednvec(&vecY, static_cast(n), y); - - cusparseStatus_t result = cusparseSpMV( - handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); - return result; - -#else - - return cusparseCsrmvEx(handle, - alg, - transA, - m, - n, - nnz, - alpha, - CUDA_R_64F, - descrA, - csrValA, - CUDA_R_64F, - csrRowPtrA, - csrColIndA, - x, - CUDA_R_64F, - beta, - CUDA_R_64F, - y, - CUDA_R_64F, - CUDA_R_64F, - buffer); -#endif -} - -/** @} */ - /** * @defgroup Csr2cscEx2 cusparse csr->csc conversion * @{ From 303273a6542a5343c485829c5d7cbc6a0158f845 Mon Sep 17 00:00:00 2001 From: William Hicks Date: Mon, 7 Nov 2022 14:33:38 -0500 Subject: [PATCH 9/9] Rename cusparse algorithms --- cpp/include/raft/spectral/detail/matrix_wrappers.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 40388eea84..e4e028e9f0 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -282,9 +282,9 @@ struct sparse_matrix_t { cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const { switch (alg) { - case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_CSRMV_ALG1; - case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_CSRMV_ALG2; - default: return CUSPARSE_MV_ALG_DEFAULT; + case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_SPMV_CSR_ALG1; + case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_SPMV_CSR_ALG2; + default: return CUSPARSE_SPMV_ALG_DEFAULT; } } #endif