diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in index eb27b2d02c..c83224050e 100644 --- a/cpp/doxygen/Doxyfile.in +++ b/cpp/doxygen/Doxyfile.in @@ -815,7 +815,7 @@ RECURSIVE = YES EXCLUDE = @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/linalg/symmetrize.hpp \ # Contains device code @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/csr.hpp \ # Contains device code - @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/cusparse_wrappers.h + @CMAKE_CURRENT_SOURCE_DIR@/include/raft/sparse/detail/cusparse_wrappers.h # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/cpp/include/raft/handle.hpp b/cpp/include/raft/handle.hpp index 015d422f9a..7d6a5bfafd 100644 --- a/cpp/include/raft/handle.hpp +++ b/cpp/include/raft/handle.hpp @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp index c8d3b46d03..2570d7ae65 100644 --- a/cpp/include/raft/sparse/convert/dense.hpp +++ b/cpp/include/raft/sparse/convert/dense.hpp @@ -32,6 +32,7 @@ namespace convert { * @param[in] handle : cusparse handle for conversion * @param[in] nrows : number of rows in CSR * @param[in] ncols : number of columns in CSR + * @param[in] nnz : number of nonzeros in CSR * @param[in] csr_indptr : CSR row index pointer array * @param[in] csr_indices : CSR column indices array * @param[in] csr_data : CSR data array @@ -44,6 +45,7 @@ template void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols, + value_idx nnz, const value_idx* csr_indptr, const value_idx* csr_indices, const value_t* csr_data, @@ -53,7 +55,7 @@ void csr_to_dense(cusparseHandle_t handle, bool row_major = true) { detail::csr_to_dense( - handle, nrows, ncols, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major); + handle, nrows, ncols, nnz, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major); } }; // end NAMESPACE convert diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh index fd300dcdba..c37087789c 100644 --- a/cpp/include/raft/sparse/convert/detail/coo.cuh +++ b/cpp/include/raft/sparse/convert/detail/coo.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index 0f4dc4976c..751335dfca 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -61,15 +61,16 @@ void coo_to_csr(const raft::handle_t& handle, cudaMemcpyAsync(dstRows.data(), srcRows, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream)); RAFT_CUDA_TRY( cudaMemcpyAsync(dstCols, srcCols, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream)); - auto buffSize = raft::sparse::cusparsecoosort_bufferSizeExt( + auto buffSize = raft::sparse::detail::cusparsecoosort_bufferSizeExt( cusparseHandle, m, m, nnz, srcRows, srcCols, stream); rmm::device_uvector pBuffer(buffSize, stream); rmm::device_uvector P(nnz, stream); RAFT_CUSPARSE_TRY(cusparseCreateIdentityPermutation(cusparseHandle, nnz, P.data())); - raft::sparse::cusparsecoosortByRow( + raft::sparse::detail::cusparsecoosortByRow( cusparseHandle, m, m, nnz, dstRows.data(), dstCols, P.data(), pBuffer.data(), stream); - raft::sparse::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream); - raft::sparse::cusparsecoo2csr(cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream); + raft::sparse::detail::cusparsegthr(cusparseHandle, nnz, srcVals, dstVals, P.data(), stream); + raft::sparse::detail::cusparsecoo2csr( + cusparseHandle, dstRows.data(), nnz, m, dst_offsets, stream); RAFT_CUDA_TRY(cudaDeviceSynchronize()); } diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index 9f48fd2172..b2756b81c9 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -31,6 +31,7 @@ #include #include +#include namespace raft { namespace sparse { @@ -67,6 +68,7 @@ __global__ void csr_to_dense_warp_per_row_kernel( * @param[in] handle : cusparse handle for conversion * @param[in] nrows : number of rows in CSR * @param[in] ncols : number of columns in CSR + * @param[in] nnz : the number of nonzeros in CSR * @param[in] csr_indptr : CSR row index pointer array * @param[in] csr_indices : CSR column indices array * @param[in] csr_data : CSR data array @@ -79,6 +81,7 @@ template void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols, + value_idx nnz, const value_idx* csr_indptr, const value_idx* csr_indices, const value_t* csr_data, @@ -96,8 +99,34 @@ void csr_to_dense(cusparseHandle_t handle, RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(out_mat, CUSPARSE_INDEX_BASE_ZERO)); RAFT_CUSPARSE_TRY(cusparseSetMatType(out_mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2dense( - handle, nrows, ncols, out_mat, csr_data, csr_indptr, csr_indices, out, lda, stream)); + size_t buffer_size; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense_buffersize(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + &buffer_size, + stream)); + + rmm::device_uvector buffer(buffer_size, stream); + + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2dense(handle, + nrows, + ncols, + nnz, + out_mat, + csr_data, + csr_indptr, + csr_indices, + out, + lda, + buffer.data(), + stream)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyMatDescr(out_mat)); diff --git a/cpp/include/raft/sparse/detail/csr.cuh b/cpp/include/raft/sparse/detail/csr.cuh index cb39f34ba4..a256ac402b 100644 --- a/cpp/include/raft/sparse/detail/csr.cuh +++ b/cpp/include/raft/sparse/detail/csr.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h new file mode 100644 index 0000000000..1f9f0e5175 --- /dev/null +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +///@todo: enable this once logging is enabled +//#include + +#define _CUSPARSE_ERR_TO_STR(err) \ + case err: return #err; + +// Notes: +//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic; +//(2.) to enforce a lower version, +// +//`#define CUDA_ENFORCE_LOWER +// #include ` +// +// (i.e., before including this header) +// +#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100) + +namespace raft { + +/** + * @brief Exception thrown when a cuSparse error is encountered. + */ +struct cusparse_error : public raft::exception { + explicit cusparse_error(char const* const message) : raft::exception(message) {} + explicit cusparse_error(std::string const& message) : raft::exception(message) {} +}; + +namespace sparse { +namespace detail { + +inline const char* cusparse_error_to_string(cusparseStatus_t err) +{ +#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 + return cusparseGetErrorString(err); +#else // CUDART_VERSION + switch (err) { + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + default: return "CUSPARSE_STATUS_UNKNOWN"; + }; +#endif // CUDART_VERSION +} + +} // namespace detail +} // namespace sparse +} // namespace raft + +#undef _CUSPARSE_ERR_TO_STR + +/** + * @brief Error checking macro for cuSparse runtime API functions. + * + * Invokes a cuSparse runtime API function call, if the call does not return + * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred + */ +#define RAFT_CUSPARSE_TRY(call) \ + do { \ + cusparseStatus_t const status = (call); \ + if (CUSPARSE_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "cuSparse error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + raft::sparse::detail::cusparse_error_to_string(status)); \ + throw raft::cusparse_error(msg); \ + } \ + } while (0) + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_TRY +#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call) +#endif + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_CHECK +#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) +#endif + +//@todo: use logger here once logging is enabled +/** check for cusparse runtime API errors but do not assert */ +#define RAFT_CUSPARSE_TRY_NO_THROW(call) \ + do { \ + cusparseStatus_t err = call; \ + if (err != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE call='%s' got errorcode=%d err=%s", \ + #call, \ + err, \ + raft::sparse::detail::cusparse_error_to_string(err)); \ + } \ + } while (0) + +// FIXME: Remove after consumer rename +#ifndef CUSPARSE_CHECK_NO_THROW +#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call) +#endif diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h similarity index 84% rename from cpp/include/raft/sparse/cusparse_wrappers.h rename to cpp/include/raft/sparse/detail/cusparse_wrappers.h index e2306686ce..aef3976294 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -16,116 +16,14 @@ #pragma once +#include #include - -#include -///@todo: enable this once logging is enabled -//#include - -#define _CUSPARSE_ERR_TO_STR(err) \ - case err: return #err; - -// Notes: -//(1.) CUDA_VER_10_1_UP aggregates all the CUDA version selection logic; -//(2.) to enforce a lower version, -// -//`#define CUDA_ENFORCE_LOWER -// #include ` -// -// (i.e., before including this header) -// -#define CUDA_VER_10_1_UP (CUDART_VERSION >= 10100) +#include namespace raft { - -/** - * @brief Exception thrown when a cuSparse error is encountered. - */ -struct cusparse_error : public raft::exception { - explicit cusparse_error(char const* const message) : raft::exception(message) {} - explicit cusparse_error(std::string const& message) : raft::exception(message) {} -}; - namespace sparse { namespace detail { -inline const char* cusparse_error_to_string(cusparseStatus_t err) -{ -#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 - return cusparseGetErrorString(err); -#else // CUDART_VERSION - switch (err) { - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); - default: return "CUSPARSE_STATUS_UNKNOWN"; - }; -#endif // CUDART_VERSION -} - -} // namespace detail -} // namespace sparse -} // namespace raft - -#undef _CUSPARSE_ERR_TO_STR - -/** - * @brief Error checking macro for cuSparse runtime API functions. - * - * Invokes a cuSparse runtime API function call, if the call does not return - * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred - */ -#define RAFT_CUSPARSE_TRY(call) \ - do { \ - cusparseStatus_t const status = (call); \ - if (CUSPARSE_STATUS_SUCCESS != status) { \ - std::string msg{}; \ - SET_ERROR_MSG(msg, \ - "cuSparse error encountered at: ", \ - "call='%s', Reason=%d:%s", \ - #call, \ - status, \ - raft::sparse::detail::cusparse_error_to_string(status)); \ - throw raft::cusparse_error(msg); \ - } \ - } while (0) - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_TRY -#define CUSPARSE_TRY(call) RAFT_CUSPARSE_TRY(call) -#endif - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_CHECK -#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) -#endif - -//@todo: use logger here once logging is enabled -/** check for cusparse runtime API errors but do not assert */ -#define RAFT_CUSPARSE_TRY_NO_THROW(call) \ - do { \ - cusparseStatus_t err = call; \ - if (err != CUSPARSE_STATUS_SUCCESS) { \ - printf("CUSPARSE call='%s' got errorcode=%d err=%s", \ - #call, \ - err, \ - raft::sparse::detail::cusparse_error_to_string(err)); \ - } \ - } while (0) - -// FIXME: Remove after consumer rename -#ifndef CUSPARSE_CHECK_NO_THROW -#define CUSPARSE_CHECK_NO_THROW(call) RAFT_CUSPARSE_TRY_NO_THROW(call) -#endif - -namespace raft { -namespace sparse { - /** * @defgroup gthr cusparse gather methods * @{ @@ -875,6 +773,41 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnVecDescr_t vecX; + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); + + cusparseDnVecDescr_t vecY; + cusparsecreatednvec(&vecY, static_cast(n), y); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, + transA, + alpha, + matA, + vecX, + beta, + vecY, + CUDA_R_32F, + CUSPARSE_SPMV_ALG_DEFAULT, + bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + return result; + +#else + return cusparseCsrmvEx_bufferSize(handle, alg, transA, @@ -896,6 +829,7 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, CUDA_R_32F, CUDA_R_32F, bufferSizeInBytes); +#endif } template <> inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, @@ -916,6 +850,39 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnVecDescr_t vecX; + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); + + cusparseDnVecDescr_t vecY; + cusparsecreatednvec(&vecY, static_cast(n), y); + + cusparseStatus_t result = cusparseSpMV_bufferSize(handle, + transA, + alpha, + matA, + vecX, + beta, + vecY, + CUDA_R_64F, + CUSPARSE_SPMV_ALG_DEFAULT, + bufferSizeInBytes); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + return result; +#else return cusparseCsrmvEx_bufferSize(handle, alg, transA, @@ -937,6 +904,7 @@ inline cusparseStatus_t cusparsecsrmvex_bufferSize(cusparseHandle_t handle, CUDA_R_64F, CUDA_R_64F, bufferSizeInBytes); +#endif } template @@ -975,6 +943,31 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnVecDescr_t vecX; + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); + + cusparseDnVecDescr_t vecY; + cusparsecreatednvec(&vecY, static_cast(n), y); + + cusparseStatus_t result = cusparseSpMV( + handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + return result; +#else return cusparseCsrmvEx(handle, alg, transA, @@ -996,6 +989,7 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, CUDA_R_32F, CUDA_R_32F, buffer); +#endif } template <> inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, @@ -1016,6 +1010,33 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, cudaStream_t stream) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnVecDescr_t vecX; + cusparsecreatednvec(&vecX, static_cast(n), const_cast(x)); + + cusparseDnVecDescr_t vecY; + cusparsecreatednvec(&vecY, static_cast(n), y); + + cusparseStatus_t result = cusparseSpMV( + handle, transA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnVec(vecY)); + return result; + +#else + return cusparseCsrmvEx(handle, alg, transA, @@ -1037,6 +1058,7 @@ inline cusparseStatus_t cusparsecsrmvex(cusparseHandle_t handle, CUDA_R_64F, CUDA_R_64F, buffer); +#endif } /** @} */ @@ -1564,50 +1586,233 @@ inline cusparseStatus_t cusparsecsrgemm2(cusparseHandle_t handle, * @{ */ +template +cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const T* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + T* A, + int lda, + size_t* buffer_size, + cudaStream_t stream, + bool row_major = false); + +template <> +inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const float* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + float* A, + int lda, + size_t* buffer_size, + cudaStream_t stream, + bool row_major) +{ +#if CUDART_VERSION >= 11020 + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnMatDescr_t matB; + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize( + handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + +#else + + cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS; + buffer_size[0] = 0; + +#endif + return result; +} + +template <> +inline cusparseStatus_t cusparsecsr2dense_buffersize(cusparseHandle_t handle, + int m, + int n, + int nnz, + const cusparseMatDescr_t descrA, + const double* csrValA, + const int* csrRowPtrA, + const int* csrColIndA, + double* A, + int lda, + size_t* buffer_size, + cudaStream_t stream, + bool row_major) +{ +#if CUDART_VERSION >= 11020 + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnMatDescr_t matB; + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); + + cusparseStatus_t result = cusparseSparseToDense_bufferSize( + handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_size); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + +#else + cusparseStatus_t result = CUSPARSE_STATUS_SUCCESS; + buffer_size[0] = 0; + +#endif + + return result; +} + template cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int m, int n, + int nnz, const cusparseMatDescr_t descrA, const T* csrValA, const int* csrRowPtrA, const int* csrColIndA, T* A, int lda, - cudaStream_t stream); + void* buffer, + cudaStream_t stream, + bool row_major = false); template <> inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int m, int n, + int nnz, const cusparseMatDescr_t descrA, const float* csrValA, const int* csrRowPtrA, const int* csrColIndA, float* A, int lda, - cudaStream_t stream) + void* buffer, + cudaStream_t stream, + bool row_major) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnMatDescr_t matB; + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); + + cusparseStatus_t result = + cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + + return result; +#else return cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); +#endif } template <> inline cusparseStatus_t cusparsecsr2dense(cusparseHandle_t handle, int m, int n, + int nnz, const cusparseMatDescr_t descrA, const double* csrValA, const int* csrRowPtrA, const int* csrColIndA, double* A, int lda, - cudaStream_t stream) + void* buffer, + cudaStream_t stream, + bool row_major) { CUSPARSE_CHECK(cusparseSetStream(handle, stream)); + +#if CUDART_VERSION >= 11020 + cusparseOrder_t order = row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + cusparseSpMatDescr_t matA; + cusparsecreatecsr(&matA, + m, + n, + nnz, + const_cast(csrRowPtrA), + const_cast(csrColIndA), + const_cast(csrValA)); + + cusparseDnMatDescr_t matB; + cusparsecreatednmat(&matB, + static_cast(m), + static_cast(n), + static_cast(lda), + const_cast(A), + order); + + cusparseStatus_t result = + cusparseSparseToDense(handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(matB)); + + return result; +#else + return cusparseDcsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); +#endif } /** @} */ +} // namespace detail } // namespace sparse -} // namespace raft +} // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh index f65f524f62..124fa2285d 100644 --- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh index c23a2b1537..046b65a0f0 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "../../csr.hpp" #include "../../detail/utils.h" diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh index b1e9ae5a55..6e717e9920 100644 --- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh @@ -19,10 +19,10 @@ #include #include #include -#include +#include +#include -#include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh index 57411f6998..0624674e81 100644 --- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh index 4ceb31a3c8..de9049ced7 100644 --- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh @@ -20,8 +20,7 @@ #include #include -#include - +#include #include #include diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp index 7ec032d186..dc9837ab43 100644 --- a/cpp/include/raft/sparse/distance/distance.hpp +++ b/cpp/include/raft/sparse/distance/distance.hpp @@ -16,28 +16,16 @@ #pragma once -#include +#include #include -#include #include -#include -#include - -#include -#include -#include -#include -#include -#include #include #include #include #include -#include - namespace raft { namespace sparse { namespace distance { diff --git a/cpp/include/raft/sparse/linalg/detail/add.cuh b/cpp/include/raft/sparse/linalg/detail/add.cuh index 769c7e523f..b288d0a603 100644 --- a/cpp/include/raft/sparse/linalg/detail/add.cuh +++ b/cpp/include/raft/sparse/linalg/detail/add.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index f4b4f65f7e..b7420a55e7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 9d1741fab7..5b43798e2e 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 045f0e14bc..4384f2ba55 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index be74a72817..398877eaab 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -70,39 +70,39 @@ void csr_transpose(cusparseHandle_t handle, { size_t convert_csc_workspace_size = 0; - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc_bufferSize(handle, - csr_nrows, - csr_ncols, - nnz, - csr_data, - csr_indptr, - csr_indices, - csc_data, - csc_indptr, - csc_indices, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &convert_csc_workspace_size, - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc_bufferSize(handle, + csr_nrows, + csr_ncols, + nnz, + csr_data, + csr_indptr, + csr_indices, + csc_data, + csc_indptr, + csc_indices, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + &convert_csc_workspace_size, + stream)); rmm::device_uvector convert_csc_workspace(convert_csc_workspace_size, stream); - RAFT_CUSPARSE_TRY(raft::sparse::cusparsecsr2csc(handle, - csr_nrows, - csr_ncols, - nnz, - csr_data, - csr_indptr, - csr_indices, - csc_data, - csc_indptr, - csc_indices, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - convert_csc_workspace.data(), - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsr2csc(handle, + csr_nrows, + csr_ncols, + nnz, + csr_data, + csr_indptr, + csr_indices, + csc_data, + csc_indptr, + csc_indices, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + convert_csc_workspace.data(), + stream)); } }; // end NAMESPACE detail diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 6e5d518619..80a6584251 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh index ba728f54c8..e4a64fbb51 100644 --- a/cpp/include/raft/sparse/op/detail/reduce.cuh +++ b/cpp/include/raft/sparse/op/detail/reduce.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/row_op.cuh b/cpp/include/raft/sparse/op/detail/row_op.cuh index b8803d4926..4754f753d4 100644 --- a/cpp/include/raft/sparse/op/detail/row_op.cuh +++ b/cpp/include/raft/sparse/op/detail/row_op.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.h index 0f4f50ceb6..e3c0f09e14 100644 --- a/cpp/include/raft/sparse/op/detail/slice.h +++ b/cpp/include/raft/sparse/op/detail/slice.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -106,4 +106,4 @@ void csr_row_slice_populate(value_idx start_offset, }; // namespace detail }; // namespace op }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 94feda1e76..9fc7cac5e3 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index 75f0121795..d86dc21135 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -208,24 +208,24 @@ struct sparse_matrix_t { // void*; the casts should be harmless) // cusparseSpMatDescr_t matA; - RAFT_CUSPARSE_TRY(cusparsecreatecsr(&matA, - nrows_, - ncols_, - nnz_, - const_cast(row_offsets_), - const_cast(col_indices_), - const_cast(values_))); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA, + nrows_, + ncols_, + nnz_, + const_cast(row_offsets_), + const_cast(col_indices_), + const_cast(values_))); cusparseDnVecDescr_t vecX; - RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecX, size_x, x)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x)); cusparseDnVecDescr_t vecY; - RAFT_CUSPARSE_TRY(cusparsecreatednvec(&vecY, size_y, y)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y)); // get (scratch) external device buffer size: // size_t bufferSize; - RAFT_CUSPARSE_TRY(cusparsespmv_buffersize( + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize( cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream)); // allocate external buffer: @@ -234,7 +234,7 @@ struct sparse_matrix_t { // finally perform SpMV: // - RAFT_CUSPARSE_TRY(cusparsespmv( + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv( cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream)); // free descriptors: @@ -244,7 +244,8 @@ struct sparse_matrix_t { RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX)); RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA)); #else - RAFT_CUSPARSE_TRY(cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); cusparseMatDescr_t descr = 0; RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr)); if (symmetric) { @@ -253,20 +254,20 @@ struct sparse_matrix_t { RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); } RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); - RAFT_CUSPARSE_TRY(cusparsecsrmv(cusparse_h, - trans, - nrows_, - ncols_, - nnz_, - &alpha, - descr, - values_, - row_offsets_, - col_indices_, - x, - &beta, - y, - stream)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h, + trans, + nrows_, + ncols_, + nnz_, + &alpha, + descr, + values_, + row_offsets_, + col_indices_, + x, + &beta, + y, + stream)); RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr)); #endif } diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/test/sparse/csr_row_slice.cu index f0a245b432..e92717c454 100644 --- a/cpp/test/sparse/csr_row_slice.cu +++ b/cpp/test/sparse/csr_row_slice.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu index 8dec9492bb..60447e3a81 100644 --- a/cpp/test/sparse/csr_to_dense.cu +++ b/cpp/test/sparse/csr_to_dense.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include @@ -36,6 +36,7 @@ template struct CSRToDenseInputs { value_idx nrows; value_idx ncols; + value_idx nnz; std::vector indptr_h; std::vector indices_h; @@ -95,6 +96,7 @@ class CSRToDenseTest : public ::testing::TestWithParam> inputs_i32_f = { {4, 4, + 8, {0, 2, 4, 6, 8}, {0, 1, 2, 3, 0, 1, 2, 3}, // indices {1.0f, 3.0f, 1.0f, 5.0f, 50.0f, 28.0f, 16.0f, 2.0f}, diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/test/sparse/csr_transpose.cu index ab95c3610f..e4fb7a102b 100644 --- a/cpp/test/sparse/csr_transpose.cu +++ b/cpp/test/sparse/csr_transpose.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include "../test_utils.h" diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu index d3f4adb01b..e2288daed9 100644 --- a/cpp/test/sparse/dist_coo_spmv.cu +++ b/cpp/test/sparse/dist_coo_spmv.cu @@ -16,12 +16,10 @@ #include -#include - #include #include #include -#include +#include #include #include diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu index c8798f832f..7c61f2ed1c 100644 --- a/cpp/test/sparse/distance.cu +++ b/cpp/test/sparse/distance.cu @@ -20,7 +20,7 @@ #include #include -#include +#include #include diff --git a/cpp/test/sparse/knn.cu b/cpp/test/sparse/knn.cu index f0336a31fa..5a066c2c28 100644 --- a/cpp/test/sparse/knn.cu +++ b/cpp/test/sparse/knn.cu @@ -19,11 +19,9 @@ #include "../test_utils.h" #include -#include #include #include -#include namespace raft { namespace sparse {