Skip to content

Commit

Permalink
Merge pull request #41 from aschaffer/fea_ext_csrmv_ex
Browse files Browse the repository at this point in the history
[REVIEW] Update to better cusparseSpMv() to support removal of deprecated cusparse calls in `cugraph`
  • Loading branch information
cjnolet authored Jul 27, 2020
2 parents b6ef2a8 + 6e210f3 commit a7f6e88
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- PR #22: Preserve order in comms workers for rank initialization
- PR #38: Remove #include <cudart_utils.h> from `raft/mr/`
- PR #39: Adding a virtual destructor to `raft::handle_t` and `raft::comms::comms_t`
- PR #41: Upgrade to `cusparseSpMV()`, alg selection, and rectangular matrices.

## Bug Fixes
- PR #17: Make destructor inline to avoid redeclaration error
Expand Down
83 changes: 76 additions & 7 deletions cpp/include/raft/sparse/cusparse_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ inline cusparseStatus_t cusparsegemmi(cusparseHandle_t handle, int m, int n,
}
/** @} */

#if __CUDACC_VER_MAJOR__ > 10
#if __CUDACC_VER_MAJOR__ >= 10 and __CUDACC_VER_MINOR__ > 0
/**
* @defgroup cusparse Create CSR operations
* @{
Expand All @@ -226,9 +226,8 @@ cusparseStatus_t cusparsecreatecsr(cusparseSpMatDescr_t* spMatDescr,
template <>
inline cusparseStatus_t cusparsecreatecsr(cusparseSpMatDescr_t* spMatDescr,
int64_t rows, int64_t cols,
int64_t nnz, int32_t* csrRowOffsets,
int32_t* csrColInd,
float* csrValues) {
int64_t nnz, int* csrRowOffsets,
int* csrColInd, float* csrValues) {
return cusparseCreateCsr(spMatDescr, rows, cols, nnz, csrRowOffsets,
csrColInd, csrValues, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
Expand All @@ -237,9 +236,8 @@ inline cusparseStatus_t cusparsecreatecsr(cusparseSpMatDescr_t* spMatDescr,
template <>
inline cusparseStatus_t cusparsecreatecsr(cusparseSpMatDescr_t* spMatDescr,
int64_t rows, int64_t cols,
int64_t nnz, int32_t* csrRowOffsets,
int32_t* csrColInd,
double* csrValues) {
int64_t nnz, int* csrRowOffsets,
int* csrColInd, double* csrValues) {
return cusparseCreateCsr(spMatDescr, rows, cols, nnz, csrRowOffsets,
csrColInd, csrValues, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
Expand Down Expand Up @@ -514,5 +512,76 @@ inline cusparseStatus_t cusparsesetpointermode(cusparseHandle_t handle,
}
/** @} */

/**
* @defgroup CsrmvEx cusparse csrmvex operations
* @{
*/
template <typename T>
cusparseStatus_t cusparsecsrmvex_bufferSize(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const T* alpha, const cusparseMatDescr_t descrA,
const T* csrValA, const int* csrRowPtrA, const int* csrColIndA, const T* x,
const T* beta, T* y, size_t* bufferSizeInBytes, cudaStream_t stream);
template <>
inline cusparseStatus_t cusparsecsrmvex_bufferSize(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const float* alpha, const cusparseMatDescr_t descrA,
const float* csrValA, const int* csrRowPtrA, const int* csrColIndA,
const float* x, const float* beta, float* y, size_t* bufferSizeInBytes,
cudaStream_t stream) {
CUSPARSE_CHECK(cusparseSetStream(handle, stream));
return cusparseCsrmvEx_bufferSize(
handle, alg, transA, m, n, nnz, alpha, CUDA_R_32F, descrA, csrValA,
CUDA_R_32F, csrRowPtrA, csrColIndA, x, CUDA_R_32F, beta, CUDA_R_32F, y,
CUDA_R_32F, CUDA_R_32F, bufferSizeInBytes);
}
template <>
inline cusparseStatus_t cusparsecsrmvex_bufferSize(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const double* alpha, const cusparseMatDescr_t descrA,
const double* csrValA, const int* csrRowPtrA, const int* csrColIndA,
const double* x, const double* beta, double* y, size_t* bufferSizeInBytes,
cudaStream_t stream) {
CUSPARSE_CHECK(cusparseSetStream(handle, stream));
return cusparseCsrmvEx_bufferSize(
handle, alg, transA, m, n, nnz, alpha, CUDA_R_64F, descrA, csrValA,
CUDA_R_64F, csrRowPtrA, csrColIndA, x, CUDA_R_64F, beta, CUDA_R_64F, y,
CUDA_R_64F, CUDA_R_64F, bufferSizeInBytes);
}

template <typename T>
cusparseStatus_t cusparsecsrmvex(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const T* alpha, const cusparseMatDescr_t descrA,
const T* csrValA, const int* csrRowPtrA, const int* csrColIndA, const T* x,
const T* beta, T* y, T* buffer, cudaStream_t stream);
template <>
inline cusparseStatus_t cusparsecsrmvex(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const float* alpha, const cusparseMatDescr_t descrA,
const float* csrValA, const int* csrRowPtrA, const int* csrColIndA,
const float* x, const float* beta, float* y, float* buffer,
cudaStream_t stream) {
CUSPARSE_CHECK(cusparseSetStream(handle, stream));
return cusparseCsrmvEx(handle, alg, transA, m, n, nnz, alpha, CUDA_R_32F,
descrA, csrValA, CUDA_R_32F, csrRowPtrA, csrColIndA, x,
CUDA_R_32F, beta, CUDA_R_32F, y, CUDA_R_32F,
CUDA_R_32F, buffer);
}
template <>
inline cusparseStatus_t cusparsecsrmvex(
cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA,
int m, int n, int nnz, const double* alpha, const cusparseMatDescr_t descrA,
const double* csrValA, const int* csrRowPtrA, const int* csrColIndA,
const double* x, const double* beta, double* y, double* buffer,
cudaStream_t stream) {
CUSPARSE_CHECK(cusparseSetStream(handle, stream));
return cusparseCsrmvEx(handle, alg, transA, m, n, nnz, alpha, CUDA_R_64F,
descrA, csrValA, CUDA_R_64F, csrRowPtrA, csrColIndA, x,
CUDA_R_64F, beta, CUDA_R_64F, y, CUDA_R_64F,
CUDA_R_64F, buffer);
}
/** @} */

} // namespace sparse
} // namespace raft
88 changes: 69 additions & 19 deletions cpp/include/raft/spectral/matrix_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ namespace matrix {

using size_type = int; // for now; TODO: move it in appropriate header

// specifies type of algorithm used
// for SpMv:
//
enum struct sparse_mv_alg_t : int {
SPARSE_MV_UNDEFINED = -1,
SPARSE_MV_ALG_DEFAULT, // generic, for any sparse matrix
SPARSE_MV_ALG1, // typical for CSR
SPARSE_MV_ALG2 // may provide better performamce for irregular sparse matrices
};

// Vector "view"-like aggregate for linear algebra purposes
//
template <typename value_type>
Expand Down Expand Up @@ -109,6 +119,18 @@ class vector_t {

template <typename index_type, typename value_type>
struct sparse_matrix_t {
sparse_matrix_t(handle_t const& raft_handle, index_type const* row_offsets,
index_type const* col_indices, value_type const* values,
index_type const nrows, index_type const ncols,
index_type const nnz)
: handle_(raft_handle),
row_offsets_(row_offsets),
col_indices_(col_indices),
values_(values),
nrows_(nrows),
ncols_(ncols),
nnz_(nnz) {}

sparse_matrix_t(handle_t const& raft_handle, index_type const* row_offsets,
index_type const* col_indices, value_type const* values,
index_type const nrows, index_type const nnz)
Expand All @@ -117,6 +139,7 @@ struct sparse_matrix_t {
col_indices_(col_indices),
values_(values),
nrows_(nrows),
ncols_(nrows),
nnz_(nnz) {}

template <typename CSRView>
Expand All @@ -126,6 +149,7 @@ struct sparse_matrix_t {
col_indices_(csr_view.indices),
values_(csr_view.edge_data),
nrows_(csr_view.number_of_vertices),
ncols_(csr_view.number_of_vertices),
nnz_(csr_view.number_of_edges) {}

virtual ~sparse_matrix_t(void) =
Expand All @@ -137,8 +161,9 @@ struct sparse_matrix_t {
// down is dangerous)
//
virtual void mv(value_type alpha, value_type* __restrict__ x, value_type beta,
value_type* __restrict__ y, bool transpose = false,
bool symmetric = false) const {
value_type* __restrict__ y,
sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
bool transpose = false, bool symmetric = false) const {
using namespace sparse;

RAFT_EXPECTS(x != nullptr, "Null x buffer.");
Expand All @@ -151,26 +176,34 @@ struct sparse_matrix_t {
transpose ? CUSPARSE_OPERATION_TRANSPOSE : // transpose
CUSPARSE_OPERATION_NON_TRANSPOSE; //non-transpose

#if __CUDACC_VER_MAJOR__ > 10
#if __CUDACC_VER_MAJOR__ >= 10 and __CUDACC_VER_MINOR__ > 0
auto size_x = transpose ? nrows_ : ncols_;
auto size_y = transpose ? ncols_ : nrows_;

cusparseSpMVAlg_t spmv_alg = translate_algorithm(alg);

//create descriptors:
//(below casts are necessary, because
// cusparseCreateCsr(...) takes non-const
// void*; the casts should be harmless)
//
cusparseSpMatDescr_t matA;
CUSPARSE_CHECK(cusparsecreatecsr(&matA, nrows_, nrows_, nnz_, row_offsets_,
col_indices_, values_));
CUSPARSE_CHECK(cusparsecreatecsr(
&matA, nrows_, ncols_, nnz_, const_cast<index_type*>(row_offsets_),
const_cast<index_type*>(col_indices_), const_cast<value_type*>(values_)));

cusparseDnVecDescr_t vecX;
CUSPARSE_CHECK(cusparsecreatednvec(&vecX, nrows_, x));
CUSPARSE_CHECK(cusparsecreatednvec(&vecX, size_x, x));

cusparseDnVecDescr_t vecY;
CUSPARSE_CHECK(cusparsecreatednvec(&vecY, nrows_, y));
CUSPARSE_CHECK(cusparsecreatednvec(&vecY, size_y, y));

//get (scratch) external device buffer size:
//
size_t bufferSize;
CUSPARSE_CHECK(
cusparsespmv_buffersize(cusparse_h, trans, &alpha, matA, vecX, &beta,
vecY, CUSPARSE_CSRMV_ALG1, &bufferSize, stream));
CUSPARSE_CHECK(cusparsespmv_buffersize(cusparse_h, trans, &alpha, matA,
vecX, &beta, vecY, spmv_alg,
&bufferSize, stream));

//allocate external buffer:
//
Expand All @@ -179,8 +212,7 @@ struct sparse_matrix_t {
//finally perform SpMV:
//
CUSPARSE_CHECK(cusparsespmv(cusparse_h, trans, &alpha, matA, vecX, &beta,
vecY, CUSPARSE_CSRMV_ALG1,
external_buffer.raw(), stream));
vecY, spmv_alg, external_buffer.raw(), stream));

//free descriptors:
//(TODO: maybe wrap them in a RAII struct?)
Expand All @@ -199,7 +231,7 @@ struct sparse_matrix_t {
CUSPARSE_CHECK(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL));
}
CUSPARSE_CHECK(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO));
CUSPARSE_CHECK(cusparsecsrmv(cusparse_h, trans, nrows_, nrows_, nnz_,
CUSPARSE_CHECK(cusparsecsrmv(cusparse_h, trans, nrows_, ncols_, nnz_,
&alpha, descr, values_, row_offsets_,
col_indices_, x, &beta, y, stream));
CUSPARSE_CHECK(cusparseDestroyMatDescr(descr));
Expand All @@ -208,13 +240,27 @@ struct sparse_matrix_t {

handle_t const& get_handle(void) const { return handle_; }

#if __CUDACC_VER_MAJOR__ >= 10 and __CUDACC_VER_MINOR__ > 0
cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const {
switch (alg) {
case sparse_mv_alg_t::SPARSE_MV_ALG1:
return CUSPARSE_CSRMV_ALG1;
case sparse_mv_alg_t::SPARSE_MV_ALG2:
return CUSPARSE_CSRMV_ALG2;
default:
return CUSPARSE_MV_ALG_DEFAULT;
}
}
#endif

//private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence, aggregate

handle_t const& handle_;
index_type const* row_offsets_;
index_type const* col_indices_;
value_type const* values_;
index_type const nrows_;
index_type const ncols_;
index_type const nnz_;
};

Expand Down Expand Up @@ -252,8 +298,9 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
// y = alpha*A*x + beta*y
//
void mv(value_type alpha, value_type* __restrict__ x, value_type beta,
value_type* __restrict__ y, bool transpose = false,
bool symmetric = false) const override {
value_type* __restrict__ y,
sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
bool transpose = false, bool symmetric = false) const override {
constexpr int BLOCK_SIZE = 1024;
auto n = sparse_matrix_t<index_type, value_type>::nrows_;

Expand Down Expand Up @@ -282,7 +329,8 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {

// Apply adjacency matrix
//
sparse_matrix_t<index_type, value_type>::mv(-alpha, x, 1, y);
sparse_matrix_t<index_type, value_type>::mv(-alpha, x, 1, y, alg, transpose,
symmetric);
}

vector_t<value_type> diagonal_;
Expand Down Expand Up @@ -316,8 +364,9 @@ struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
// y = alpha*A*x + beta*y
//
void mv(value_type alpha, value_type* __restrict__ x, value_type beta,
value_type* __restrict__ y, bool transpose = false,
bool symmetric = false) const override {
value_type* __restrict__ y,
sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
bool transpose = false, bool symmetric = false) const override {
auto n = sparse_matrix_t<index_type, value_type>::nrows_;

auto cublas_h =
Expand All @@ -327,7 +376,8 @@ struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {

// y = A*x
//
sparse_matrix_t<index_type, value_type>::mv(alpha, x, 0, y);
sparse_matrix_t<index_type, value_type>::mv(alpha, x, 0, y, alg, transpose,
symmetric);
value_type dot_res;

// gamma = d'*x
Expand Down

0 comments on commit a7f6e88

Please sign in to comment.