From 959bb29c0a825a2b3a1aac17f6171670f0eb2ffd Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 8 Dec 2022 20:41:25 +0000 Subject: [PATCH 01/20] gram matrix support for csr --- .../distance/detail/kernels/gram_matrix.cuh | 166 ++++++++++++++++++ .../detail/kernels/kernel_matrices.cuh | 146 +++++++++++++++ 2 files changed, 312 insertions(+) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 344dda693e..6bc412e248 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include #include @@ -77,6 +79,42 @@ class GramMatrixBase { evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + virtual void operator()(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out = 0) + { + if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } + evaluateSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + ld_out); + } + /** Evaluate the Gram matrix for two vector sets using simple dot product. * * @param [in] x1 device array of vectors, size [n1*n_cols] @@ -107,6 +145,41 @@ class GramMatrixBase { linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + virtual void evaluateSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out) + { + linearSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + ld_out); + } + // private: // The following methods should be private, they are kept public to avoid: // "error: The enclosing parent function ("distance") for an extended @@ -182,6 +255,99 @@ class GramMatrixBase { } } + void linearSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out) + { + int minor_out = is_row_major ? n2 : n1; + ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); + distanceSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + raft::distance::DistanceType::InnerProduct); + } + + void distanceSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + raft::distance::DistanceType metric, + float metricArg = 0.0) + { + raft::sparse::distance::distances_config_t dist_config(handle); + + // switch a,b based on is_row_major + if (!is_row_major) { + dist_config.a_nrows = n2; + dist_config.a_ncols = n_cols; + dist_config.a_nnz = x2_nnz; + dist_config.a_indptr = const_cast(x2_indptr); + dist_config.a_indices = const_cast(x2_indices); + dist_config.a_data = const_cast(x2_data); + dist_config.b_nrows = n1; + dist_config.b_ncols = n_cols; + dist_config.b_nnz = x1_nnz; + dist_config.b_indptr = const_cast(x1_indptr); + dist_config.b_indices = const_cast(x1_indices); + dist_config.b_data = const_cast(x1_data); + } else { + dist_config.a_nrows = n1; + dist_config.a_ncols = n_cols; + dist_config.a_nnz = x1_nnz; + dist_config.a_indptr = const_cast(x1_indptr); + dist_config.a_indices = const_cast(x1_indices); + dist_config.a_data = const_cast(x1_data); + dist_config.b_nrows = n2; + dist_config.b_ncols = n_cols; + dist_config.b_nnz = x2_nnz; + dist_config.b_indptr = const_cast(x2_indptr); + dist_config.b_indices = const_cast(x2_indices); + dist_config.b_data = const_cast(x2_data); + } + + if (raft::sparse::distance::supportedDistance.find(metric) == + raft::sparse::distance::supportedDistance.end()) + THROW("DistanceType not supported: %d", metric); + + raft::sparse::distance::pairwiseDistance(out, dist_config, metric, metricArg); + } + /** Calculates the Gram matrix using Euclidean distance. * * Can be used as a building block for more complex kernel functions. diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index b74de84d80..b81ace83b3 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -100,6 +100,40 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga } } +/** Epiloge function for rbf kernel without padding. + * Calculates output = exp(-gain * input); + * @param inout device vector, size [len] + * @param len length of the input vector + * @param gain + */ +template +__global__ void rbf_kernel_nopad(math_t* inout, size_t len, math_t gain) +{ + for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < len; + tid += blockDim.x * gridDim.x) { + inout[tid] = exp(-1.0 * gain * inout[tid]); + } +} + +/** Epiloge function for rbf kernel without padding. + * Calculates output = exp(-gain * input); + * @param inout device vector in column major format, size [ld * cols] + * @param ld leading dimension of the inout buffer + * @param rows number of rows (rows <= ld) + * @param cols number of columns + * @param gain + */ +template +__global__ void rbf_kernel(math_t* inout, int ld, int rows, int cols, math_t gain) +{ + for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols; + tidy += blockDim.y * gridDim.y) + for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows; + tidx += blockDim.x * gridDim.x) { + inout[tidx + tidy * ld] = exp(-1.0 * gain * inout[tidx + tidy * ld]); + } +} + /** * Create a kernel matrix using polynomial kernel function. */ @@ -180,6 +214,42 @@ class PolynomialKernel : public GramMatrixBase { x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); applyKernel(out, ld_out, n1, n2, is_row_major, stream); } + + void evaluateSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out) + { + GramMatrixBase::linearSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + ld_out); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); + } }; /** @@ -260,6 +330,42 @@ class TanhKernel : public GramMatrixBase { x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); applyKernel(out, ld_out, n1, n2, is_row_major, stream); } + + void evaluateSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out) + { + GramMatrixBase::linearSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + ld_out); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); + } }; /** @@ -337,6 +443,46 @@ class RBFKernel : public GramMatrixBase { distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + void evaluateSparse(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const int* x2_indptr, + const int* x2_indices, + const math_t* x2_data, + int x2_nnz, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld_out) + { + int minor_out = is_row_major ? n2 : n1; + ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); + + GramMatrixBase::distanceSparse(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_indptr, + x2_indices, + x2_data, + x2_nnz, + n2, + out, + is_row_major, + stream, + raft::distance::DistanceType::L2Unexpanded); + + applyKernel(out, ld_out, n1, n2, is_row_major, stream); + } + /** Customize distance function withe RBF epilogue */ void distance(const math_t* x1, int n1, From 36c56b1016cb3a27f4d6ffb9cd8764f58e129f75 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 2 Feb 2023 08:24:32 -0800 Subject: [PATCH 02/20] Add CSRxDense kernel compute, also add row norm for CSR --- .../distance/detail/kernels/gram_matrix.cuh | 153 +++++++++++++++++ .../detail/kernels/kernel_matrices.cuh | 156 ++++++++++++++++++ .../raft/sparse/linalg/detail/norm.cuh | 88 ++++++++++ cpp/include/raft/sparse/linalg/norm.cuh | 32 ++++ 4 files changed, 429 insertions(+) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 6bc412e248..25c5992bc1 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -79,6 +80,46 @@ class GramMatrixBase { evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + virtual void operator()(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2 = 0, + int ld_out = 0, + math_t* norm = nullptr, + int offset_x1 = 0, + int* idx_x2 = 0) + + { + if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } + if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } + evaluateSparseX1(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_data, + n2, + out, + is_row_major, + stream, + ld2, + ld_out, + norm, + offset_x1, + idx_x2); + } + virtual void operator()(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, @@ -145,6 +186,40 @@ class GramMatrixBase { linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + virtual void evaluateSparseX1(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2, + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) + { + linearSparseX1(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_data, + n2, + out, + is_row_major, + stream, + ld2, + ld_out); + } + virtual void evaluateSparse(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, @@ -255,6 +330,84 @@ class GramMatrixBase { } } + void linearSparseX1(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2, + int ld_out) + { + math_t alpha = 1.0; + math_t beta = 0.0; + + cusparseSpMatDescr_t descrX1; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&descrX1, + n1, + n_cols, + x1_nnz, + const_cast(x1_indptr), + const_cast(x1_indices), + const_cast(x1_data))); + + auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + + cusparseDnMatDescr_t descrX2; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( + &descrX2, n2, n_cols, ld2, const_cast(x2_data), order)); + + cusparseDnMatDescr_t descrOut; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( + &descrOut, n1, n2, ld_out, const_cast(out), order)); + + auto alg = order == CUSPARSE_ORDER_COL ? CUSPARSE_SPMM_CSR_ALG1 : CUSPARSE_SPMM_CSR_ALG2; + + // compute X1*X2^T + auto opX1 = CUSPARSE_OPERATION_NON_TRANSPOSE; + auto opX2 = CUSPARSE_OPERATION_TRANSPOSE; + + size_t bufferSize; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), + opX1, + opX2, + &alpha, + descrX1, + descrX2, + &beta, + descrOut, + alg, + &bufferSize, + stream)); + + raft::interruptible::synchronize(stream); + + rmm::device_uvector tmp(bufferSize, stream); + + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(), + opX1, + opX2, + &alpha, + descrX1, + descrX2, + &beta, + descrOut, + alg, + tmp.data(), + stream)); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descrX1)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descrX2)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descrOut)); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + void linearSparse(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index b81ace83b3..db9e16233b 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -134,6 +134,32 @@ __global__ void rbf_kernel(math_t* inout, int ld, int rows, int cols, math_t gai } } +/** Epiloge function for rbf kernel using expansion. + * Calculates output_ij = exp(-gain * (norm_i + norm_j - 2*input_ij)); + * @param inout device vector in column major format, size [ld * cols] + * @param ld leading dimension of the inout buffer + * @param rows number of rows (rows <= ld) + * @param cols number of columns + * @param norm norm for row indices + * @param offset_i offset into norm for rows (assumed to be coalesced) + * @param idx_j indirect column id to access norm + * @param gain + */ +template +__global__ void rbf_kernel_expanded( + math_t* inout, int ld, int rows, int cols, math_t* norm, int offset_i, int* idx_j, math_t gain) +{ + for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols; + tidy += blockDim.y * gridDim.y) { + math_t norm_y = norm[idx_j[tidy]]; + for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows; + tidx += blockDim.x * gridDim.x) { + inout[tidx + tidy * ld] = + exp(-1.0 * gain * (norm[tidx + offset_i] + norm_y - inout[tidx + tidy * ld] * 2)); + } + } +} + /** * Create a kernel matrix using polynomial kernel function. */ @@ -215,6 +241,41 @@ class PolynomialKernel : public GramMatrixBase { applyKernel(out, ld_out, n1, n2, is_row_major, stream); } + void evaluateSparseX1(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2, + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) + { + GramMatrixBase::linearSparseX1(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_data, + n2, + out, + is_row_major, + stream, + ld2, + ld_out); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); + } + void evaluateSparse(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, @@ -331,6 +392,41 @@ class TanhKernel : public GramMatrixBase { applyKernel(out, ld_out, n1, n2, is_row_major, stream); } + void evaluateSparseX1(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2, + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) + { + GramMatrixBase::linearSparseX1(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_data, + n2, + out, + is_row_major, + stream, + ld2, + ld_out); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); + } + void evaluateSparse(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, @@ -392,6 +488,23 @@ class RBFKernel : public GramMatrixBase { } } + void applyExpandedRbfKernel(math_t* inout, + int ld, + int rows, + int cols, + math_t* norm, + int offset_i, + int* idx_j, + bool is_row_major, + cudaStream_t stream) + { + ASSERT(!is_row_major, "Expanded RBF kernel currently only supports col major format"); + rbf_kernel_expanded<<>>(inout, ld, rows, cols, norm, offset_i, idx_j, gain); + } + public: /** * Constructs a RBF kernel object. @@ -443,6 +556,49 @@ class RBFKernel : public GramMatrixBase { distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } + void evaluateSparseX1(const raft::handle_t& handle, + const int* x1_indptr, + const int* x1_indices, + const math_t* x1_data, + int x1_nnz, + int n1, + int n_cols, + const math_t* x2_data, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld2, + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) + { + int minor2 = is_row_major ? n_cols : n2; + int minor_out = is_row_major ? n2 : n1; + ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); + ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); + + ASSERT(norm != nullptr, "RBF Kernel needs pre-computed norm for expanded distance compute"); + // compute L2 expanded + GramMatrixBase::linearSparseX1(handle, + x1_indptr, + x1_indices, + x1_data, + x1_nnz, + n1, + n_cols, + x2_data, + n2, + out, + is_row_major, + stream, + ld2, + ld_out); + + applyExpandedRbfKernel(out, ld_out, n1, n2, norm, offset_x1, idx_x2, is_row_major, stream); + } + void evaluateSparse(const raft::handle_t& handle, const int* x1_indptr, const int* x1_indices, diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index c2a8aa4246..7605ce8351 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -170,6 +171,93 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) RAFT_CUDA_TRY(cudaGetLastError()); } +template +struct CsrReductionPolicy { + static constexpr int LogicalWarpSize = warpSize; + static constexpr int RowsPerBlock = rpb; + static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock; +}; + +template +__global__ void __launch_bounds__(Policy::ThreadsPerBlock) + csrReductionKernel(Type* dots, + const IdxType* ia, + const Type* data, + IdxType N, + Type init, + MainLambda main_op, + ReduceLambda reduce_op, + FinalLambda final_op) +{ + IdxType i = threadIdx.y + (Policy::RowsPerBlock * static_cast(blockIdx.x)); + if (i >= N) return; + + Type acc = init; + for (IdxType j = ia[i] + threadIdx.x; j < ia[i + 1]; j += Policy::LogicalWarpSize) { + acc = reduce_op(acc, main_op(data[j])); + } + acc = raft::logicalWarpReduce(acc, reduce_op); + if (threadIdx.x == 0) { dots[i] = final_op(acc); } +} + +template , + typename ReduceLambda = raft::Sum, + typename FinalLambda = raft::Nop> +void csrReduction(Type* dots, + const IdxType* ia, + const Type* data, + IdxType N, + Type init, + cudaStream_t stream, + MainLambda main_op = raft::Nop(), + ReduceLambda reduce_op = raft::Sum(), + FinalLambda final_op = raft::Nop()) +{ + common::nvtx::range fun_scope( + "csrReduction<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock); + dim3 threads(Policy::LogicalWarpSize, Policy::RowsPerBlock, 1); + dim3 blocks(ceildiv(N, Policy::RowsPerBlock), 1, 1); + csrReductionKernel + <<>>(dots, ia, data, N, init, main_op, reduce_op, final_op); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template +void rowNormCsrCaller(Type* dots, + const IdxType* ia, + const Type* data, + IdxType nnz, + IdxType N, + raft::linalg::NormType type, + cudaStream_t stream, + Lambda fin_op) +{ + // TODO: dispatch nnz to Policy? + switch (type) { + case raft::linalg::NormType::L1Norm: + csrReduction>( + dots, ia, data, N, (Type)0, stream, raft::L1Op(), raft::Sum(), fin_op); + break; + case raft::linalg::NormType::L2Norm: + csrReduction>( + dots, ia, data, N, (Type)0, stream, raft::L2Op(), raft::Sum(), fin_op); + break; + case raft::linalg::NormType::LinfNorm: + csrReduction>( + dots, ia, data, N, (Type)0, stream, raft::L1Op(), raft::Max(), fin_op); + break; + default: THROW("Unsupported norm type: %d", type); + }; +} + }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index e13fd22843..07b11d51f7 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -18,6 +18,7 @@ #pragma once +#include #include namespace raft { @@ -66,6 +67,37 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) detail::csr_row_normalize_max(ia, vals, nnz, m, result, stream); } +/** + * @brief Compute row-wise norm of the input matrix and perform fin_op lambda + * + * Row-wise norm is useful while computing pairwise distance matrix, for + * example. + * This is used in many clustering algos like knn, kmeans, dbscan, etc... + * + * @tparam Type the data type + * @tparam Lambda device final lambda + * @tparam IdxType Integer type used to for addressing + * @param dots the output vector of row-wise dot products + * @param ia the input matrix row pointers + * @param data the input matrix nnz data + * @param N number of rows of data + * @param type the type of norm to be applied + * @param stream cuda stream where to launch work + * @param fin_op the final lambda op + */ +template > +void rowNormCsr(Type* dots, + const IdxType* ia, + const Type* data, + IdxType nnz, + IdxType N, + raft::linalg::NormType type, + cudaStream_t stream, + Lambda fin_op = raft::Nop()) +{ + detail::rowNormCsrCaller(dots, ia, data, nnz, N, type, stream, fin_op); +} + }; // end NAMESPACE linalg }; // end NAMESPACE sparse }; // end NAMESPACE raft From a99c129420fee926096ce7bd19f253015640d241 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Fri, 3 Feb 2023 04:37:58 -0800 Subject: [PATCH 03/20] fix RBF for dense with offset --- .../distance/detail/kernels/gram_matrix.cuh | 25 ++++++--- .../detail/kernels/kernel_factory.cuh | 2 +- .../detail/kernels/kernel_matrices.cuh | 51 ++++++++++++++----- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 25c5992bc1..7f93232a18 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -61,6 +61,9 @@ class GramMatrixBase { * @param ld1 leading dimension of x1 * @param ld2 leading dimension of x2 * @param ld_out leading dimension of out + * @param norm optional L2 row norm of x1 for expanded computation within RBF. + * @param offset_x1 offset where x1 starts within norm + * @param idx_x2 indirect access to x2 row id within norm */ virtual void operator()(const math_t* x1, int n1, @@ -70,14 +73,18 @@ class GramMatrixBase { math_t* out, bool is_row_major, cudaStream_t stream, - int ld1 = 0, - int ld2 = 0, - int ld_out = 0) + int ld1 = 0, + int ld2 = 0, + int ld_out = 0, + math_t* norm = nullptr, + int offset_x1 = 0, + int* idx_x2 = nullptr) { if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; } if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } - evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + evaluate( + x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out, norm, offset_x1, idx_x2); } virtual void operator()(const raft::handle_t& handle, @@ -96,7 +103,7 @@ class GramMatrixBase { int ld_out = 0, math_t* norm = nullptr, int offset_x1 = 0, - int* idx_x2 = 0) + int* idx_x2 = nullptr) { if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } @@ -170,6 +177,9 @@ class GramMatrixBase { * @param ld1 leading dimension of x1 (usually it is n1) * @param ld2 leading dimension of x2 (usually it is n2) * @param ld_out leading dimension of out (usually it is n1) + * @param norm optional L2 row norm of x1 for expanded computation within RBF. + * @param offset_x1 offset where x1 starts within norm + * @param idx_x2 indirect access to x2 row id within norm */ virtual void evaluate(const math_t* x1, int n1, @@ -181,7 +191,10 @@ class GramMatrixBase { cudaStream_t stream, int ld1, int ld2, - int ld_out) + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) { linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); } diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index 1aa6809bcd..68e9d72418 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -38,7 +38,7 @@ class KernelFactory { res = new PolynomialKernel(params.degree, gamma, coef0, cublas_handle); break; case TANH: res = new TanhKernel(gamma, coef0, cublas_handle); break; - case RBF: res = new RBFKernel(gamma); break; + case RBF: res = new RBFKernel(gamma, cublas_handle); break; default: throw raft::exception("Kernel not implemented"); } return res; diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index db9e16233b..5b2a524f46 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -223,6 +223,9 @@ class PolynomialKernel : public GramMatrixBase { * @param ld1 leading dimension of x1 * @param ld2 leading dimension of x2 * @param ld_out leading dimension of out + * @param norm optional L2 row norm of x1 for expanded computation within RBF. + * @param offset_x1 offset where x1 starts within norm + * @param idx_x2 indirect access to x2 row id within norm */ void evaluate(const math_t* x1, int n1, @@ -234,7 +237,10 @@ class PolynomialKernel : public GramMatrixBase { cudaStream_t stream, int ld1, int ld2, - int ld_out) + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) { GramMatrixBase::linear( x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); @@ -374,6 +380,9 @@ class TanhKernel : public GramMatrixBase { * @param ld1 leading dimension of x1 (usually it is n1) * @param ld2 leading dimension of x2 (usually it is n2) * @param ld_out leading dimension of out (usually it is n1) + * @param norm optional L2 row norm of x1 for expanded computation within RBF. + * @param offset_x1 offset where x1 starts within norm + * @param idx_x2 indirect access to x2 row id within norm */ void evaluate(const math_t* x1, int n1, @@ -385,7 +394,10 @@ class TanhKernel : public GramMatrixBase { cudaStream_t stream, int ld1, int ld2, - int ld_out) + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) { GramMatrixBase::linear( x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); @@ -514,7 +526,10 @@ class RBFKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain */ - RBFKernel(math_t gain) : GramMatrixBase(NULL), gain(gain) {} + RBFKernel(math_t gain, cublasHandle_t cublas_handle) + : GramMatrixBase(cublas_handle), gain(gain) + { + } /** Evaluate kernel matrix using RBF kernel. * @@ -534,6 +549,9 @@ class RBFKernel : public GramMatrixBase { * @param ld1 leading dimension of x1, currently only ld1 == n1 is supported * @param ld2 leading dimension of x2, currently only ld2 == n2 is supported * @param ld_out leading dimension of out, only ld_out == n1 is supported + * @param norm optional L2 row norm of x1 for expanded computation within RBF. + * @param offset_x1 offset where x1 starts within norm + * @param idx_x2 indirect access to x2 row id within norm */ void evaluate(const math_t* x1, int n1, @@ -545,15 +563,25 @@ class RBFKernel : public GramMatrixBase { cudaStream_t stream, int ld1, int ld2, - int ld_out) + int ld_out, + math_t* norm, + int offset_x1, + int* idx_x2) { - int minor1 = is_row_major ? n_cols : n1; - int minor2 = is_row_major ? n_cols : n2; int minor_out = is_row_major ? n2 : n1; - ASSERT(ld1 == minor1, "RBF Kernel distance does not support ld1 parameter"); - ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); - distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + if (norm != nullptr) { + // compute L2expanded + GramMatrixBase::linear( + x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + applyExpandedRbfKernel(out, ld_out, n1, n2, norm, offset_x1, idx_x2, is_row_major, stream); + } else { + int minor1 = is_row_major ? n_cols : n1; + int minor2 = is_row_major ? n_cols : n2; + ASSERT(ld1 == minor1, "RBF Kernel distance does not support ld1 parameter"); + ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); + distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + } } void evaluateSparseX1(const raft::handle_t& handle, @@ -574,11 +602,6 @@ class RBFKernel : public GramMatrixBase { int offset_x1, int* idx_x2) { - int minor2 = is_row_major ? n_cols : n2; - int minor_out = is_row_major ? n2 : n1; - ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); - ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); - ASSERT(norm != nullptr, "RBF Kernel needs pre-computed norm for expanded distance compute"); // compute L2 expanded GramMatrixBase::linearSparseX1(handle, From 60017db6b6ea8d7ac7deaa6427a30a9d4ebb8c30 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Tue, 21 Feb 2023 14:05:01 +0000 Subject: [PATCH 04/20] add matrix wrapper to unify kernel API --- .../distance/detail/kernels/gram_matrix.cuh | 515 +++++------------- .../detail/kernels/kernel_factory.cuh | 10 +- .../detail/kernels/kernel_matrices.cuh | 513 ++++------------- .../raft/distance/detail/matrix/matrix.hpp | 99 ++++ 4 files changed, 348 insertions(+), 789 deletions(-) create mode 100644 cpp/include/raft/distance/detail/matrix/matrix.hpp diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 7f93232a18..409e06b8e6 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -40,232 +41,55 @@ namespace raft::distance::kernels::detail { */ template class GramMatrixBase { - cublasHandle_t cublas_handle; + const raft::handle_t& handle; public: - GramMatrixBase(cublasHandle_t cublas_handle) : cublas_handle(cublas_handle){}; + GramMatrixBase(const raft::handle_t& handle) : handle(handle){}; virtual ~GramMatrixBase(){}; /** Convenience function to evaluate the Gram matrix for two vector sets. + * Vector sets are provided in Matrix format * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] + * @param [out] out (dense) device matrix to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 - * @param ld2 leading dimension of x2 - * @param ld_out leading dimension of out - * @param norm optional L2 row norm of x1 for expanded computation within RBF. - * @param offset_x1 offset where x1 starts within norm - * @param idx_x2 indirect access to x2 row id within norm + * @param dot_x1 optional dot product of x1 for expanded computation within RBF. + * @param dot_x2 optional dot product of x2 for expanded computation within RBF. */ - virtual void operator()(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1 = 0, - int ld2 = 0, - int ld_out = 0, - math_t* norm = nullptr, - int offset_x1 = 0, - int* idx_x2 = nullptr) - { - if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; } - if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } - if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } - evaluate( - x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out, norm, offset_x1, idx_x2); - } - - virtual void operator()(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2 = 0, - int ld_out = 0, - math_t* norm = nullptr, - int offset_x1 = 0, - int* idx_x2 = nullptr) - - { - if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } - if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } - evaluateSparseX1(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_data, - n2, - out, - is_row_major, - stream, - ld2, - ld_out, - norm, - offset_x1, - idx_x2); - } - - virtual void operator()(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, + virtual void operator()(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - int ld_out = 0) + math_t* dot_x1 = nullptr, + math_t* dot_x2 = nullptr) { - if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } - evaluateSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - ld_out); + ASSERT(x1.n_rows == out.n_rows, + "GramMatrix input matrix dimensions for x1 and out do not match"); + ASSERT(x2.n_rows == out.n_cols, + "GramMatrix input matrix dimensions for x2 and out do not match"); + ASSERT(x1.n_cols == x2.n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match"); + evaluate(x1, x2, out, stream, dot_x1, dot_x2); } /** Evaluate the Gram matrix for two vector sets using simple dot product. * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 (usually it is n1) - * @param ld2 leading dimension of x2 (usually it is n2) - * @param ld_out leading dimension of out (usually it is n1) - * @param norm optional L2 row norm of x1 for expanded computation within RBF. - * @param offset_x1 offset where x1 starts within norm - * @param idx_x2 indirect access to x2 row id within norm + * @param dot_x1 optional dot product of x1 for expanded computation within RBF. + * @param dot_x2 optional dot product of x2 for expanded computation within RBF. */ - virtual void evaluate(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, + virtual void evaluate(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - int ld1, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); - } - - virtual void evaluateSparseX1(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - linearSparseX1(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_data, - n2, - out, - is_row_major, - stream, - ld2, - ld_out); - } - - virtual void evaluateSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld_out) + math_t* dot_x1, + math_t* dot_x2) { - linearSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - ld_out); + linear(x1, x2, out, stream); } // private: @@ -279,106 +103,89 @@ class GramMatrixBase { * * Can be used as a building block for more complex kernel functions. * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 - * @param ld2 leading dimension of x2 - * @param ld_out leading dimension of out */ - void linear(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1, - int ld2, - int ld_out) + void linear(const raft::distance::matrix::detail::DenseMatrix& x1, + const raft::distance::matrix::detail::DenseMatrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, + cudaStream_t stream) { + ASSERT(x1.is_row_major == x2.is_row_major, + "GramMatrix leading dimensions for x1 and x2 do not match"); + ASSERT(x2.is_row_major == out.is_row_major, + "GramMatrix leading dimensions for x2 and out do not match"); + math_t alpha = 1.0; math_t beta = 0.0; - if (is_row_major) { + if (out.is_row_major) { // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, - n2, - n1, - n_cols, + out.n_cols, + out.n_rows, + x1.n_cols, &alpha, - x2, - ld2, - x1, - ld1, + x2.data, + x2.ld, + x1.data, + x1.ld, &beta, - out, - ld_out, + out.data, + out.ld, stream)); } else { // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), CUBLAS_OP_N, CUBLAS_OP_T, - n1, - n2, - n_cols, + out.n_rows, + out.n_cols, + x1.n_cols, &alpha, - x1, - ld1, - x2, - ld2, + x1.data, + x1.ld, + x2.data, + x2.ld, &beta, - out, - ld_out, + out.data, + out.ld, stream)); } } - void linearSparseX1(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2, - int ld_out) + void linear(const raft::distance::matrix::detail::CsrMatrix& x1, + const raft::distance::matrix::detail::DenseMatrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, + cudaStream_t stream) { math_t alpha = 1.0; math_t beta = 0.0; + ASSERT(x2.is_row_major == out.is_row_major, + "GramMatrix leading dimensions for x2 and out do not match"); + cusparseSpMatDescr_t descrX1; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&descrX1, - n1, - n_cols, - x1_nnz, - const_cast(x1_indptr), - const_cast(x1_indices), - const_cast(x1_data))); + x1.n_rows, + x1.n_cols, + x1.nnz, + const_cast(x1.indptr), + const_cast(x1.indices), + const_cast(x1.data))); - auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + auto order = out.is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseDnMatDescr_t descrX2; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( - &descrX2, n2, n_cols, ld2, const_cast(x2_data), order)); + &descrX2, x2.n_rows, x2.n_cols, x2.ld, const_cast(x2.data), order)); cusparseDnMatDescr_t descrOut; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( - &descrOut, n1, n2, ld_out, const_cast(out), order)); + &descrOut, out.n_rows, out.n_cols, out.ld, const_cast(out.data), order)); auto alg = order == CUSPARSE_ORDER_COL ? CUSPARSE_SPMM_CSR_ALG1 : CUSPARSE_SPMM_CSR_ALG2; @@ -421,130 +228,80 @@ class GramMatrixBase { RAFT_CUDA_TRY(cudaPeekAtLastError()); } - void linearSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld_out) - { - int minor_out = is_row_major ? n2 : n1; - ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); - distanceSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - raft::distance::DistanceType::InnerProduct); - } - - void distanceSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - raft::distance::DistanceType metric, - float metricArg = 0.0) + void linear(const raft::distance::matrix::detail::CsrMatrix& x1, + const raft::distance::matrix::detail::CsrMatrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, + cudaStream_t stream) { + int minor_out = out.is_row_major ? out.n_cols : out.n_rows; + ASSERT(out.ld == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); raft::sparse::distance::distances_config_t dist_config(handle); // switch a,b based on is_row_major - if (!is_row_major) { - dist_config.a_nrows = n2; - dist_config.a_ncols = n_cols; - dist_config.a_nnz = x2_nnz; - dist_config.a_indptr = const_cast(x2_indptr); - dist_config.a_indices = const_cast(x2_indices); - dist_config.a_data = const_cast(x2_data); - dist_config.b_nrows = n1; - dist_config.b_ncols = n_cols; - dist_config.b_nnz = x1_nnz; - dist_config.b_indptr = const_cast(x1_indptr); - dist_config.b_indices = const_cast(x1_indices); - dist_config.b_data = const_cast(x1_data); + if (!out.is_row_major) { + dist_config.a_nrows = x2.n_rows; + dist_config.a_ncols = x2.n_cols; + dist_config.a_nnz = x2.nnz; + dist_config.a_indptr = const_cast(x2.indptr); + dist_config.a_indices = const_cast(x2.indices); + dist_config.a_data = const_cast(x2.data); + dist_config.b_nrows = x1.n_rows; + dist_config.b_ncols = x1.n_cols; + dist_config.b_nnz = x1.nnz; + dist_config.b_indptr = const_cast(x1.indptr); + dist_config.b_indices = const_cast(x1.indices); + dist_config.b_data = const_cast(x1.data); } else { - dist_config.a_nrows = n1; - dist_config.a_ncols = n_cols; - dist_config.a_nnz = x1_nnz; - dist_config.a_indptr = const_cast(x1_indptr); - dist_config.a_indices = const_cast(x1_indices); - dist_config.a_data = const_cast(x1_data); - dist_config.b_nrows = n2; - dist_config.b_ncols = n_cols; - dist_config.b_nnz = x2_nnz; - dist_config.b_indptr = const_cast(x2_indptr); - dist_config.b_indices = const_cast(x2_indices); - dist_config.b_data = const_cast(x2_data); + dist_config.a_nrows = x1.n_rows; + dist_config.a_ncols = x1.n_cols; + dist_config.a_nnz = x1.nnz; + dist_config.a_indptr = const_cast(x1.indptr); + dist_config.a_indices = const_cast(x1.indices); + dist_config.a_data = const_cast(x1.data); + dist_config.b_nrows = x2.n_rows; + dist_config.b_ncols = x2.n_cols; + dist_config.b_nnz = x2.nnz; + dist_config.b_indptr = const_cast(x2.indptr); + dist_config.b_indices = const_cast(x2.indices); + dist_config.b_data = const_cast(x2.data); } - if (raft::sparse::distance::supportedDistance.find(metric) == - raft::sparse::distance::supportedDistance.end()) - THROW("DistanceType not supported: %d", metric); - - raft::sparse::distance::pairwiseDistance(out, dist_config, metric, metricArg); + raft::sparse::distance::pairwiseDistance( + out.data, dist_config, raft::distance::DistanceType::InnerProduct, 0.0); } - /** Calculates the Gram matrix using Euclidean distance. + /** Calculates the Gram matrix using simple dot product between vector sets. + * + * out = x1 * x2 * * Can be used as a building block for more complex kernel functions. * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 - * @param ld2 leading dimension of x2 - * @param ld_out leading dimension of out */ - virtual void distance(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1, - int ld2, - int ld_out) + void linear(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, + cudaStream_t stream) { - raft::distance::distance( - x1, x2, out, n1, n2, n_cols, stream, is_row_major); + // dispatch + if (x1.isDense()) { + ASSERT(x2.isDense(), "GramMatrix input matrix does not allow Dense*Csr"); + auto x1_dense = x1.asDense(); + auto x2_dense = x2.asDense(); + linear(*x1_dense, *x2_dense, out, stream); + } else { + auto x1_csr = x1.asCsr(); + if (x2.isDense()) { + auto x2_dense = x2.asDense(); + linear(*x1_csr, *x2_dense, out, stream); + } else { + auto x2_csr = x2.asCsr(); + linear(*x1_csr, *x2_csr, out, stream); + } + } } }; }; // end namespace raft::distance::kernels::detail \ No newline at end of file diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index 68e9d72418..460c039073 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -26,19 +26,19 @@ namespace raft::distance::kernels::detail { template class KernelFactory { public: - static GramMatrixBase* create(KernelParams params, cublasHandle_t cublas_handle) + static GramMatrixBase* create(KernelParams params, const raft::handle_t& handle) { GramMatrixBase* res; // KernelParams is not templated, we convert the parameters to math_t here: math_t coef0 = params.coef0; math_t gamma = params.gamma; switch (params.kernel) { - case LINEAR: res = new GramMatrixBase(cublas_handle); break; + case LINEAR: res = new GramMatrixBase(handle); break; case POLYNOMIAL: - res = new PolynomialKernel(params.degree, gamma, coef0, cublas_handle); + res = new PolynomialKernel(params.degree, gamma, coef0, handle); break; - case TANH: res = new TanhKernel(gamma, coef0, cublas_handle); break; - case RBF: res = new RBFKernel(gamma, cublas_handle); break; + case TANH: res = new TanhKernel(gamma, coef0, handle); break; + case RBF: res = new RBFKernel(gamma, handle); break; default: throw raft::exception("Kernel not implemented"); } return res; diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 5b2a524f46..d65fc28cb7 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -21,6 +21,7 @@ #include #include +#include namespace raft::distance::kernels::detail { @@ -100,62 +101,27 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga } } -/** Epiloge function for rbf kernel without padding. - * Calculates output = exp(-gain * input); - * @param inout device vector, size [len] - * @param len length of the input vector - * @param gain - */ -template -__global__ void rbf_kernel_nopad(math_t* inout, size_t len, math_t gain) -{ - for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < len; - tid += blockDim.x * gridDim.x) { - inout[tid] = exp(-1.0 * gain * inout[tid]); - } -} - -/** Epiloge function for rbf kernel without padding. - * Calculates output = exp(-gain * input); - * @param inout device vector in column major format, size [ld * cols] - * @param ld leading dimension of the inout buffer - * @param rows number of rows (rows <= ld) - * @param cols number of columns - * @param gain - */ -template -__global__ void rbf_kernel(math_t* inout, int ld, int rows, int cols, math_t gain) -{ - for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols; - tidy += blockDim.y * gridDim.y) - for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows; - tidx += blockDim.x * gridDim.x) { - inout[tidx + tidy * ld] = exp(-1.0 * gain * inout[tidx + tidy * ld]); - } -} - /** Epiloge function for rbf kernel using expansion. * Calculates output_ij = exp(-gain * (norm_i + norm_j - 2*input_ij)); * @param inout device vector in column major format, size [ld * cols] * @param ld leading dimension of the inout buffer * @param rows number of rows (rows <= ld) * @param cols number of columns - * @param norm norm for row indices - * @param offset_i offset into norm for rows (assumed to be coalesced) - * @param idx_j indirect column id to access norm + * @param dot_rows dot product for row indices + * @param dot_cols dot product for column indices * @param gain */ template __global__ void rbf_kernel_expanded( - math_t* inout, int ld, int rows, int cols, math_t* norm, int offset_i, int* idx_j, math_t gain) + math_t* inout, int ld, int rows, int cols, math_t* dot_rows, math_t* dot_cols, math_t gain) { for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols; tidy += blockDim.y * gridDim.y) { - math_t norm_y = norm[idx_j[tidy]]; + math_t norm_y = dot_cols[tidy]; for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows; tidx += blockDim.x * gridDim.x) { inout[tidx + tidy * ld] = - exp(-1.0 * gain * (norm[tidx + offset_i] + norm_y - inout[tidx + tidy * ld] * 2)); + exp(-1.0 * gain * (dot_rows[tidx] + norm_y - inout[tidx + tidy * ld] * 2)); } } } @@ -198,10 +164,10 @@ class PolynomialKernel : public GramMatrixBase { * @param exponent * @param gain * @param offset - * @param cublas_handle + * @param handle */ - PolynomialKernel(exp_t exponent, math_t gain, math_t offset, cublasHandle_t cublas_handle) - : GramMatrixBase(cublas_handle), exponent(exponent), gain(gain), offset(offset) + PolynomialKernel(exp_t exponent, math_t gain, math_t offset, const raft::handle_t& handle) + : GramMatrixBase(handle), exponent(exponent), gain(gain), offset(offset) { } @@ -211,111 +177,22 @@ class PolynomialKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of features in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 - * @param ld2 leading dimension of x2 - * @param ld_out leading dimension of out - * @param norm optional L2 row norm of x1 for expanded computation within RBF. - * @param offset_x1 offset where x1 starts within norm - * @param idx_x2 indirect access to x2 row id within norm + * @param dot_x1 optional dot product of x1 for expanded computation within RBF. + * @param dot_x2 optional dot product of x2 for expanded computation within RBF. */ - void evaluate(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, + void evaluate(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - int ld1, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - GramMatrixBase::linear( - x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); - } - - void evaluateSparseX1(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) + math_t* dot_x1, + math_t* dot_x2) { - GramMatrixBase::linearSparseX1(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_data, - n2, - out, - is_row_major, - stream, - ld2, - ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); - } - - void evaluateSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld_out) - { - GramMatrixBase::linearSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); + GramMatrixBase::linear(x1, x2, out, stream); + applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); } }; @@ -355,8 +232,8 @@ class TanhKernel : public GramMatrixBase { * @param offset * @param cublas_handle */ - TanhKernel(math_t gain, math_t offset, cublasHandle_t cublas_handle) - : GramMatrixBase(cublas_handle), gain(gain), offset(offset) + TanhKernel(math_t gain, math_t offset, const raft::handle_t& handle) + : GramMatrixBase(handle), gain(gain), offset(offset) { } @@ -366,113 +243,22 @@ class TanhKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * - * @param [in] x1 device array of vectors, - * size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of features in x1 and x2 - * @param [in] x2 device array of vectors, - * size [n2*n_cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 (usually it is n1) - * @param ld2 leading dimension of x2 (usually it is n2) - * @param ld_out leading dimension of out (usually it is n1) - * @param norm optional L2 row norm of x1 for expanded computation within RBF. - * @param offset_x1 offset where x1 starts within norm - * @param idx_x2 indirect access to x2 row id within norm + * @param dot_x1 optional dot product of x1 for expanded computation within RBF. + * @param dot_x2 optional dot product of x2 for expanded computation within RBF. */ - void evaluate(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, + void evaluate(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - int ld1, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - GramMatrixBase::linear( - x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); - } - - void evaluateSparseX1(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - GramMatrixBase::linearSparseX1(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_data, - n2, - out, - is_row_major, - stream, - ld2, - ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); - } - - void evaluateSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld_out) + math_t* dot_x1, + math_t* dot_x2) { - GramMatrixBase::linearSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - ld_out); - applyKernel(out, ld_out, n1, n2, is_row_major, stream); + GramMatrixBase::linear(x1, x2, out, stream); + applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); } }; @@ -483,38 +269,23 @@ template class RBFKernel : public GramMatrixBase { math_t gain; - void applyKernel( - math_t* inout, int ld, int rows, int cols, bool is_row_major, cudaStream_t stream) - { - const int n_minor = is_row_major ? cols : rows; - if (ld == n_minor) { - rbf_kernel_nopad<<((size_t)rows * cols, 128), 128, 0, stream>>>( - inout, rows * cols, gain); - } else { - int n1 = is_row_major ? cols : rows; - int n2 = is_row_major ? rows : cols; - rbf_kernel<<>>(inout, ld, n1, n2, gain); - } - } - void applyExpandedRbfKernel(math_t* inout, int ld, int rows, int cols, - math_t* norm, - int offset_i, - int* idx_j, + math_t* dot_x1, + math_t* dot_x2, bool is_row_major, cudaStream_t stream) { - ASSERT(!is_row_major, "Expanded RBF kernel currently only supports col major format"); + int n1 = is_row_major ? cols : rows; + int n2 = is_row_major ? rows : cols; + math_t* dot_n1 = is_row_major ? dot_x2 : dot_x1; + math_t* dot_n2 = is_row_major ? dot_x1 : dot_x2; rbf_kernel_expanded<<>>(inout, ld, rows, cols, norm, offset_i, idx_j, gain); + stream>>>(inout, ld, n1, n2, dot_n1, dot_n2, gain); } public: @@ -526,9 +297,24 @@ class RBFKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain */ - RBFKernel(math_t gain, cublasHandle_t cublas_handle) - : GramMatrixBase(cublas_handle), gain(gain) + RBFKernel(math_t gain, const raft::handle_t& handle) : GramMatrixBase(handle), gain(gain) + { + } + + void matrixDot(const raft::distance::matrix::detail::Matrix& matrix, + math_t* target, + cudaStream_t stream) { + auto norm = raft::linalg::NormType::L2Norm; + if (matrix.isDense()) { + auto dense_matrix = matrix.asDense(); + raft::linalg::rowNorm( + target, dense_matrix->data, matrix.n_cols, matrix.n_rows, norm, false, stream); + } else { + auto csr_matrix = matrix.asCsr(); + raft::sparse::linalg::rowNormCsr( + target, csr_matrix->indptr, csr_matrix->data, csr_matrix->nnz, matrix.n_rows, norm, stream); + } } /** Evaluate kernel matrix using RBF kernel. @@ -537,144 +323,61 @@ class RBFKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and | | euclidean distance. * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of features in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 + * @param [in] x1 device matrix, size [n1*n_cols] + * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format * @param [in] stream cuda stream - * @param ld1 leading dimension of x1, currently only ld1 == n1 is supported - * @param ld2 leading dimension of x2, currently only ld2 == n2 is supported - * @param ld_out leading dimension of out, only ld_out == n1 is supported - * @param norm optional L2 row norm of x1 for expanded computation within RBF. - * @param offset_x1 offset where x1 starts within norm - * @param idx_x2 indirect access to x2 row id within norm + * @param dot_x1 optional dot product of x1 for expanded computation within RBF. + * @param dot_x2 optional dot product of x2 for expanded computation within RBF. */ - void evaluate(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, + void evaluate(const raft::distance::matrix::detail::Matrix& x1, + const raft::distance::matrix::detail::Matrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - int ld1, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) + math_t* dot_x1, + math_t* dot_x2) { - int minor_out = is_row_major ? n2 : n1; - ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); - if (norm != nullptr) { - // compute L2expanded - GramMatrixBase::linear( - x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); - applyExpandedRbfKernel(out, ld_out, n1, n2, norm, offset_x1, idx_x2, is_row_major, stream); + if (x1.isDense() && x2.isDense() && (dot_x1 == nullptr || dot_x2 == nullptr)) { + auto x1_dense = x1.asDense(); + auto x2_dense = x2.asDense(); + distance_rbf(*x1_dense, *x2_dense, out, stream); } else { - int minor1 = is_row_major ? n_cols : n1; - int minor2 = is_row_major ? n_cols : n2; - ASSERT(ld1 == minor1, "RBF Kernel distance does not support ld1 parameter"); - ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); - distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + rmm::device_uvector tmp_dot_x1(0, stream); + rmm::device_uvector tmp_dot_x2(0, stream); + if (dot_x1 == nullptr) { + tmp_dot_x1.reserve(x1.n_rows, stream); + dot_x1 = tmp_dot_x1.data(); + matrixDot(x1, dot_x1, stream); + } + if (dot_x2 == nullptr) { + tmp_dot_x2.reserve(x2.n_rows, stream); + dot_x2 = tmp_dot_x2.data(); + matrixDot(x2, dot_x2, stream); + } + // compute L2expanded + GramMatrixBase::linear(x1, x2, out, stream); + applyExpandedRbfKernel( + out.data, out.ld, out.n_rows, out.n_cols, dot_x1, dot_x2, out.is_row_major, stream); } } - void evaluateSparseX1(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const math_t* x2_data, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld2, - int ld_out, - math_t* norm, - int offset_x1, - int* idx_x2) - { - ASSERT(norm != nullptr, "RBF Kernel needs pre-computed norm for expanded distance compute"); - // compute L2 expanded - GramMatrixBase::linearSparseX1(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_data, - n2, - out, - is_row_major, - stream, - ld2, - ld_out); - - applyExpandedRbfKernel(out, ld_out, n1, n2, norm, offset_x1, idx_x2, is_row_major, stream); - } - - void evaluateSparse(const raft::handle_t& handle, - const int* x1_indptr, - const int* x1_indices, - const math_t* x1_data, - int x1_nnz, - int n1, - int n_cols, - const int* x2_indptr, - const int* x2_indices, - const math_t* x2_data, - int x2_nnz, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld_out) - { - int minor_out = is_row_major ? n2 : n1; - ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); - - GramMatrixBase::distanceSparse(handle, - x1_indptr, - x1_indices, - x1_data, - x1_nnz, - n1, - n_cols, - x2_indptr, - x2_indices, - x2_data, - x2_nnz, - n2, - out, - is_row_major, - stream, - raft::distance::DistanceType::L2Unexpanded); - - applyKernel(out, ld_out, n1, n2, is_row_major, stream); - } - /** Customize distance function withe RBF epilogue */ - void distance(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1, - int ld2, - int ld_out) + void distance_rbf(const raft::distance::matrix::detail::DenseMatrix& x1, + const raft::distance::matrix::detail::DenseMatrix& x2, + raft::distance::matrix::detail::DenseMatrix& out, + cudaStream_t stream) { + int minor1 = x1.is_row_major ? x1.n_cols : x1.n_rows; + int minor2 = x2.is_row_major ? x2.n_cols : x2.n_rows; + int minor_out = out.is_row_major ? out.n_cols : out.n_rows; + ASSERT(x1.ld == minor1, "RBF Kernel distance does not support ld1 parameter"); + ASSERT(x2.ld == minor2, "RBF Kernel distance does not support ld2 parameter"); + ASSERT(out.ld == minor_out, "RBF Kernel distance does not support ld_out parameter"); + ASSERT(x1.is_row_major == x2.is_row_major, + "GramMatrix leading dimensions for x1 and x2 do not match"); + ASSERT(x2.is_row_major == out.is_row_major, + "GramMatrix leading dimensions for x2 and out do not match"); + math_t gain = this->gain; using index_t = int64_t; @@ -684,17 +387,17 @@ class RBFKernel : public GramMatrixBase { math_t, math_t, decltype(fin_op), - index_t>(const_cast(x1), - const_cast(x2), - out, - n1, - n2, - n_cols, + index_t>(const_cast(x1.data), + const_cast(x2.data), + out.data, + out.n_rows, + out.n_cols, + x1.n_cols, NULL, 0, fin_op, stream, - is_row_major); + out.is_row_major); } }; diff --git a/cpp/include/raft/distance/detail/matrix/matrix.hpp b/cpp/include/raft/distance/detail/matrix/matrix.hpp new file mode 100644 index 0000000000..d4a0dda691 --- /dev/null +++ b/cpp/include/raft/distance/detail/matrix/matrix.hpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft::distance::matrix::detail { + +template +class DenseMatrix; +template +class CsrMatrix; + +/* + * Thin matrix wrapper to allow single API for different matrix representations + */ +template +class Matrix { + public: + Matrix(int rows, int cols) : n_rows(rows), n_cols(cols){}; + virtual bool isDense() const = 0; + virtual ~Matrix(){}; + + DenseMatrix* asDense() + { + DenseMatrix* cast = dynamic_cast*>(this); + ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); + return cast; + }; + + CsrMatrix* asCsr() + { + CsrMatrix* cast = dynamic_cast*>(this); + ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); + return cast; + }; + + const DenseMatrix* asDense() const + { + const DenseMatrix* cast = dynamic_cast*>(this); + ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); + return cast; + }; + + const CsrMatrix* asCsr() const + { + const CsrMatrix* cast = dynamic_cast*>(this); + ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); + return cast; + }; + + int n_rows; + int n_cols; +}; + +template +class DenseMatrix : public Matrix { + public: + DenseMatrix(math_t* data, int rows, int cols, bool row_major = false, int ld_in = 0) + : Matrix(rows, cols), data(data), is_row_major(row_major), ld(ld_in) + { + if (ld <= 0) ld = is_row_major ? cols : rows; + } + bool isDense() const { return true; } + math_t* data; + bool is_row_major; + int ld; +}; + +template +class CsrMatrix : public Matrix { + public: + CsrMatrix(int* indptr, int* indices, math_t* data, int nnz, int rows, int cols) + : Matrix(rows, cols), indptr(indptr), indices(indices), data(data), nnz(nnz) + { + } + bool isDense() const { return false; } + + int nnz; + int* indptr; + int* indices; + math_t* data; +}; + +} // namespace raft::distance::matrix::detail \ No newline at end of file From 9f46742e285925c7c4c629ef77924167230fe1f4 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Tue, 21 Feb 2023 18:23:01 +0000 Subject: [PATCH 05/20] finalize merge, adjust/add tests --- .../distance/detail/kernels/gram_matrix.cuh | 5 +- .../detail/kernels/kernel_factory.cuh | 2 +- .../detail/kernels/kernel_matrices.cuh | 7 +- .../raft/sparse/linalg/detail/norm.cuh | 20 +-- cpp/include/raft/sparse/linalg/norm.cuh | 4 +- cpp/test/CMakeLists.txt | 1 + cpp/test/distance/gram.cu | 29 ++-- cpp/test/sparse/norm.cu | 123 ++++++++--------- cpp/test/sparse/normalize.cu | 127 ++++++++++++++++++ 9 files changed, 222 insertions(+), 96 deletions(-) create mode 100644 cpp/test/sparse/normalize.cu diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index a5d756d351..1a2b4d67f8 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -41,10 +42,10 @@ namespace raft::distance::kernels::detail { */ template class GramMatrixBase { - const raft::handle_t& handle; + const raft::device_resources& handle; public: - GramMatrixBase(const raft::handle_t& handle) : handle(handle){}; + GramMatrixBase(const raft::device_resources& handle) : handle(handle){}; virtual ~GramMatrixBase(){}; diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index 460c039073..ad4a81c55a 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -26,7 +26,7 @@ namespace raft::distance::kernels::detail { template class KernelFactory { public: - static GramMatrixBase* create(KernelParams params, const raft::handle_t& handle) + static GramMatrixBase* create(KernelParams params, const raft::device_resources& handle) { GramMatrixBase* res; // KernelParams is not templated, we convert the parameters to math_t here: diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index dc96f8ec01..baaa7f5bbe 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -166,7 +166,7 @@ class PolynomialKernel : public GramMatrixBase { * @param offset * @param handle */ - PolynomialKernel(exp_t exponent, math_t gain, math_t offset, const raft::handle_t& handle) + PolynomialKernel(exp_t exponent, math_t gain, math_t offset, const raft::device_resources& handle) : GramMatrixBase(handle), exponent(exponent), gain(gain), offset(offset) { } @@ -232,7 +232,7 @@ class TanhKernel : public GramMatrixBase { * @param offset * @param cublas_handle */ - TanhKernel(math_t gain, math_t offset, const raft::handle_t& handle) + TanhKernel(math_t gain, math_t offset, const raft::device_resources& handle) : GramMatrixBase(handle), gain(gain), offset(offset) { } @@ -297,7 +297,8 @@ class RBFKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain */ - RBFKernel(math_t gain, const raft::handle_t& handle) : GramMatrixBase(handle), gain(gain) + RBFKernel(math_t gain, const raft::device_resources& handle) + : GramMatrixBase(handle), gain(gain) { } diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 7605ce8351..7dbea8c76c 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include #include #include @@ -208,18 +210,18 @@ __global__ void __launch_bounds__(Policy::ThreadsPerBlock) template , - typename ReduceLambda = raft::Sum, - typename FinalLambda = raft::Nop> + typename MainLambda = raft::identity_op, + typename ReduceLambda = raft::add_op, + typename FinalLambda = raft::identity_op> void csrReduction(Type* dots, const IdxType* ia, const Type* data, IdxType N, Type init, cudaStream_t stream, - MainLambda main_op = raft::Nop(), - ReduceLambda reduce_op = raft::Sum(), - FinalLambda final_op = raft::Nop()) + MainLambda main_op = raft::identity_op(), + ReduceLambda reduce_op = raft::add_op(), + FinalLambda final_op = raft::identity_op()) { common::nvtx::range fun_scope( "csrReduction<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock); @@ -244,15 +246,15 @@ void rowNormCsrCaller(Type* dots, switch (type) { case raft::linalg::NormType::L1Norm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::L1Op(), raft::Sum(), fin_op); + dots, ia, data, N, (Type)0, stream, raft::abs_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::L2Norm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::L2Op(), raft::Sum(), fin_op); + dots, ia, data, N, (Type)0, stream, raft::sq_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::LinfNorm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::L1Op(), raft::Max(), fin_op); + dots, ia, data, N, (Type)0, stream, raft::abs_op(), raft::max_op(), fin_op); break; default: THROW("Unsupported norm type: %d", type); }; diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 07b11d51f7..d504e735fb 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -85,7 +85,7 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) * @param stream cuda stream where to launch work * @param fin_op the final lambda op */ -template > +template void rowNormCsr(Type* dots, const IdxType* ia, const Type* data, @@ -93,7 +93,7 @@ void rowNormCsr(Type* dots, IdxType N, raft::linalg::NormType type, cudaStream_t stream, - Lambda fin_op = raft::Nop()) + Lambda fin_op = raft::identity_op()) { detail::rowNormCsrCaller(dots, ia, data, nnz, N, type, stream, fin_op); } diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 575e8cf84b..d08ef85e90 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -223,6 +223,7 @@ if(BUILD_TESTS) test/sparse/degree.cu test/sparse/filter.cu test/sparse/norm.cu + test/sparse/normalize.cu test/sparse/reduce.cu test/sparse/row_op.cu test/sparse/sort.cu diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index a2f0e2385c..c4c439e6da 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,8 @@ namespace raft::distance::kernels { +using namespace raft::distance::matrix::detail; + // Get the offset of element [i,k]. HDI int get_offset(int i, int k, int ld, bool is_row_major) { @@ -151,20 +154,18 @@ class GramMatrixTest : public ::testing::TestWithParam { void runTest() { - std::unique_ptr> kernel = std::unique_ptr>( - KernelFactory::create(params.kernel, handle.get_cublas_handle())); - - kernel->evaluate(x1.data(), - params.n1, - params.n_cols, - x2.data(), - params.n2, - gram.data(), - params.is_row_major, - stream, - params.ld1, - params.ld2, - params.ld_out); + std::unique_ptr> kernel = + std::unique_ptr>(KernelFactory::create(params.kernel, handle)); + + DenseMatrix x1_dense( + x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); + DenseMatrix x2_dense( + x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); + DenseMatrix gram_dense( + x1.data(), params.n1, params.n2, params.is_row_major, params.ld_out); + + (*kernel)(x1_dense, x2_dense, gram_dense, stream); + naiveKernel(); ASSERT_TRUE(raft::devArrMatchHost( gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu index 91b7b09fcc..f1328fa52d 100644 --- a/cpp/test/sparse/norm.cu +++ b/cpp/test/sparse/norm.cu @@ -19,7 +19,7 @@ #include "../test_utils.cuh" #include -#include +#include #include #include @@ -29,26 +29,24 @@ namespace raft { namespace sparse { -enum NormalizeMethod { MAX, L1 }; - template -struct CSRRowNormalizeInputs { - NormalizeMethod method; - std::vector ex_scan; - std::vector in_vals; +struct CSRRowNormInputs { + raft::linalg::NormType norm; + std::vector indptr; + std::vector data; std::vector verify; }; template -class CSRRowNormalizeTest : public ::testing::TestWithParam> { +class CSRRowNormTest : public ::testing::TestWithParam> { public: - CSRRowNormalizeTest() - : params(::testing::TestWithParam>::GetParam()), + CSRRowNormTest() + : params(::testing::TestWithParam>::GetParam()), stream(handle.get_stream()), - in_vals(params.in_vals.size(), stream), - verify(params.verify.size(), stream), - ex_scan(params.ex_scan.size(), stream), - result(params.verify.size(), stream) + data(params.data.size(), stream), + verify(params.indptr.size() - 1, stream), + indptr(params.indptr.size(), stream), + result(params.indptr.size() - 1, stream) { } @@ -57,71 +55,66 @@ class CSRRowNormalizeTest : public ::testing::TestWithParam( - ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream); - break; - case L1: - linalg::csr_row_normalize_l1( - ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream); - break; - } + Index_ n_rows = params.indptr.size() - 1; + Index_ nnz = params.data.size(); + + raft::update_device(indptr.data(), params.indptr.data(), n_rows + 1, stream); + raft::update_device(data.data(), params.data.data(), nnz, stream); + raft::update_device(verify.data(), params.verify.data(), n_rows, stream); + + linalg::rowNormCsr(result.data(), indptr.data(), data.data(), nnz, n_rows, params.norm, stream); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); ASSERT_TRUE( - raft::devArrMatch(verify.data(), result.data(), nnz, raft::Compare())); + raft::devArrMatch(verify.data(), result.data(), n_rows, raft::Compare())); } protected: raft::device_resources handle; cudaStream_t stream; - CSRRowNormalizeInputs params; - rmm::device_uvector ex_scan; - rmm::device_uvector in_vals, result, verify; + CSRRowNormInputs params; + rmm::device_uvector indptr; + rmm::device_uvector data, result, verify; }; -using CSRRowNormalizeTestF = CSRRowNormalizeTest; -TEST_P(CSRRowNormalizeTestF, Result) { Run(); } - -using CSRRowNormalizeTestD = CSRRowNormalizeTest; -TEST_P(CSRRowNormalizeTestD, Result) { Run(); } - -const std::vector> csrnormalize_inputs_f = { - {MAX, - {0, 4, 8, 9}, - {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0}, - {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}}, - {L1, - {0, 4, 8, 9}, - {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0}, - {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}}, +using CSRRowNormTestF = CSRRowNormTest; +TEST_P(CSRRowNormTestF, Result) { Run(); } + +using CSRRowNormTestD = CSRRowNormTest; +TEST_P(CSRRowNormTestD, Result) { Run(); } + +const std::vector> csrnorm_inputs_f = { + {raft::linalg::NormType::LinfNorm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {5.0, 10.0, 2.0}}, + {raft::linalg::NormType::L1Norm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {8.0, 13.0, 4.0}}, + {raft::linalg::NormType::L2Norm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {30.0, 105.0, 6.0}}, }; -const std::vector> csrnormalize_inputs_d = { - {MAX, - {0, 4, 8, 9}, - {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0}, - {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}}, - {L1, - {0, 4, 8, 9}, - {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0}, - {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}}, +const std::vector> csrnorm_inputs_d = { + {raft::linalg::NormType::LinfNorm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {5.0, 10.0, 2.0}}, + {raft::linalg::NormType::L1Norm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {8.0, 13.0, 4.0}}, + {raft::linalg::NormType::L2Norm, + {0, 3, 7, 10}, + {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0}, + {30.0, 105.0, 6.0}}, }; -INSTANTIATE_TEST_CASE_P(SparseNormTest, - CSRRowNormalizeTestF, - ::testing::ValuesIn(csrnormalize_inputs_f)); -INSTANTIATE_TEST_CASE_P(SparseNormTest, - CSRRowNormalizeTestD, - ::testing::ValuesIn(csrnormalize_inputs_d)); +INSTANTIATE_TEST_CASE_P(SparseNormTest, CSRRowNormTestF, ::testing::ValuesIn(csrnorm_inputs_f)); +INSTANTIATE_TEST_CASE_P(SparseNormTest, CSRRowNormTestD, ::testing::ValuesIn(csrnorm_inputs_d)); } // namespace sparse } // namespace raft diff --git a/cpp/test/sparse/normalize.cu b/cpp/test/sparse/normalize.cu new file mode 100644 index 0000000000..91b7b09fcc --- /dev/null +++ b/cpp/test/sparse/normalize.cu @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../test_utils.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace raft { +namespace sparse { + +enum NormalizeMethod { MAX, L1 }; + +template +struct CSRRowNormalizeInputs { + NormalizeMethod method; + std::vector ex_scan; + std::vector in_vals; + std::vector verify; +}; + +template +class CSRRowNormalizeTest : public ::testing::TestWithParam> { + public: + CSRRowNormalizeTest() + : params(::testing::TestWithParam>::GetParam()), + stream(handle.get_stream()), + in_vals(params.in_vals.size(), stream), + verify(params.verify.size(), stream), + ex_scan(params.ex_scan.size(), stream), + result(params.verify.size(), stream) + { + } + + protected: + void SetUp() override {} + + void Run() + { + Index_ n_rows = params.ex_scan.size(); + Index_ nnz = params.in_vals.size(); + + raft::update_device(ex_scan.data(), params.ex_scan.data(), n_rows, stream); + raft::update_device(in_vals.data(), params.in_vals.data(), nnz, stream); + raft::update_device(verify.data(), params.verify.data(), nnz, stream); + + switch (params.method) { + case MAX: + linalg::csr_row_normalize_max( + ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream); + break; + case L1: + linalg::csr_row_normalize_l1( + ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream); + break; + } + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + ASSERT_TRUE( + raft::devArrMatch(verify.data(), result.data(), nnz, raft::Compare())); + } + + protected: + raft::device_resources handle; + cudaStream_t stream; + + CSRRowNormalizeInputs params; + rmm::device_uvector ex_scan; + rmm::device_uvector in_vals, result, verify; +}; + +using CSRRowNormalizeTestF = CSRRowNormalizeTest; +TEST_P(CSRRowNormalizeTestF, Result) { Run(); } + +using CSRRowNormalizeTestD = CSRRowNormalizeTest; +TEST_P(CSRRowNormalizeTestD, Result) { Run(); } + +const std::vector> csrnormalize_inputs_f = { + {MAX, + {0, 4, 8, 9}, + {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0}, + {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}}, + {L1, + {0, 4, 8, 9}, + {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0}, + {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}}, +}; +const std::vector> csrnormalize_inputs_d = { + {MAX, + {0, 4, 8, 9}, + {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0}, + {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}}, + {L1, + {0, 4, 8, 9}, + {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0}, + {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}}, +}; + +INSTANTIATE_TEST_CASE_P(SparseNormTest, + CSRRowNormalizeTestF, + ::testing::ValuesIn(csrnormalize_inputs_f)); +INSTANTIATE_TEST_CASE_P(SparseNormTest, + CSRRowNormalizeTestD, + ::testing::ValuesIn(csrnormalize_inputs_d)); + +} // namespace sparse +} // namespace raft From c0964955753230ea30de2454f544efc61c96dffb Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Wed, 22 Feb 2023 20:31:27 +0000 Subject: [PATCH 06/20] add test and fix rbf --- .../detail/kernels/kernel_matrices.cuh | 16 +- cpp/test/CMakeLists.txt | 9 +- cpp/test/distance/gram.cu | 2 +- cpp/test/sparse/gram.cu | 342 ++++++++++++++++++ 4 files changed, 363 insertions(+), 6 deletions(-) create mode 100644 cpp/test/sparse/gram.cu diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index baaa7f5bbe..5079a87027 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -230,7 +230,7 @@ class TanhKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain * @param offset - * @param cublas_handle + * @param handle */ TanhKernel(math_t gain, math_t offset, const raft::device_resources& handle) : GramMatrixBase(handle), gain(gain), offset(offset) @@ -282,7 +282,7 @@ class RBFKernel : public GramMatrixBase { int n2 = is_row_major ? rows : cols; math_t* dot_n1 = is_row_major ? dot_x2 : dot_x1; math_t* dot_n2 = is_row_major ? dot_x1 : dot_x2; - rbf_kernel_expanded<<>>(inout, ld, n1, n2, dot_n1, dot_n2, gain); @@ -309,8 +309,16 @@ class RBFKernel : public GramMatrixBase { auto norm = raft::linalg::NormType::L2Norm; if (matrix.isDense()) { auto dense_matrix = matrix.asDense(); - raft::linalg::rowNorm( - target, dense_matrix->data, matrix.n_cols, matrix.n_rows, norm, false, stream); + int minor = dense_matrix->is_row_major ? matrix.n_cols : matrix.n_rows; + ASSERT(dense_matrix->ld == minor, + "RBF Kernel lazy rowNorm compute does not support ld parameter"); + raft::linalg::rowNorm(target, + dense_matrix->data, + matrix.n_cols, + matrix.n_rows, + norm, + dense_matrix->is_row_major, + stream); } else { auto csr_matrix = matrix.asCsr(); raft::sparse::linalg::rowNormCsr( diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index d08ef85e90..64d757e33a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -232,7 +232,14 @@ if(BUILD_TESTS) ) ConfigureTest( - NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu OPTIONAL DIST + NAME + SPARSE_DIST_TEST + PATH + test/sparse/dist_coo_spmv.cu + test/sparse/distance.cu + test/sparse/gram.cu + OPTIONAL + DIST NN ) diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index c4c439e6da..7ea9cc3c7d 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -162,7 +162,7 @@ class GramMatrixTest : public ::testing::TestWithParam { DenseMatrix x2_dense( x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); DenseMatrix gram_dense( - x1.data(), params.n1, params.n2, params.is_row_major, params.ld_out); + gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); (*kernel)(x1_dense, x2_dense, gram_dense, stream); diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu new file mode 100644 index 0000000000..2cf880d23e --- /dev/null +++ b/cpp/test/sparse/gram.cu @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined RAFT_DISTANCE_COMPILED +#include +#endif + +#include "../test_utils.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft::distance::kernels { + +using namespace raft::distance::matrix::detail; + +// Get the offset of element [i,k]. +HDI int get_offset(int i, int k, int ld, bool is_row_major) +{ + return is_row_major ? i * ld + k : i + k * ld; +} + +enum SparseType { DENSE, MIX, CSR }; + +struct GramMatrixInputs { + int n1; // feature vectors in matrix 1 + int n2; // featuer vectors in matrix 2 + int n_cols; // number of elements in a feature vector + bool is_row_major; + SparseType sparse_input; + KernelParams kernel; + int ld1; + int ld2; + int ld_out; + // We will generate random input using the dimensions given here. + // The reference output is calculated by a custom kernel. +}; + +std::ostream& operator<<(std::ostream& os, const GramMatrixInputs& p) +{ + std::vector kernel_names{"linear", "poly", "rbf", "tanh"}; + os << "/" << p.n1 << "x" << p.n2 << "x" << p.n_cols << "/" + << (p.is_row_major ? "RowMajor/" : "ColMajor/") + << (p.sparse_input == SparseType::DENSE + ? "DenseDense/" + : (p.sparse_input == SparseType::MIX ? "CsrDense/" : "CsrCsr/")) + << kernel_names[p.kernel.kernel] << "/ld_" << p.ld1 << "x" << p.ld2 << "x" << p.ld_out; + return os; +} + +const std::vector inputs = { + {42, 137, 2, false, SparseType::DENSE, {KernelType::LINEAR}}, + {42, 137, 2, true, SparseType::DENSE, {KernelType::LINEAR}}, + {42, 137, 2, false, SparseType::DENSE, {KernelType::LINEAR}, 64, 179, 181}, + {42, 137, 2, true, SparseType::DENSE, {KernelType::LINEAR}, 64, 179, 181}, + {42, 137, 2, false, SparseType::MIX, {KernelType::LINEAR}}, + {42, 137, 2, true, SparseType::MIX, {KernelType::LINEAR}}, + {42, 137, 2, false, SparseType::MIX, {KernelType::LINEAR}, 64, 179, 181}, + {42, 137, 2, true, SparseType::MIX, {KernelType::LINEAR}, 64, 179, 181}, + {42, 137, 2, false, SparseType::CSR, {KernelType::LINEAR}}, + {42, 137, 2, true, SparseType::CSR, {KernelType::LINEAR}}, + // CSR does not support ld_out + {42, 137, 2, false, SparseType::CSR, {KernelType::LINEAR}, 64, 179, 0}, + {42, 137, 2, true, SparseType::CSR, {KernelType::LINEAR}, 64, 179, 0}, + {137, 42, 2, false, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + {137, 42, 2, true, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + {137, 42, 2, false, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, + {137, 42, 2, true, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, + {137, 42, 2, false, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + {137, 42, 2, true, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + {137, 42, 2, false, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, + {137, 42, 2, true, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, + {137, 42, 2, false, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + {137, 42, 2, true, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, + // CSR does not support ld_out + {137, 42, 2, false, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 0}, + {137, 42, 2, true, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 0}, + {42, 137, 2, false, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}}, + {42, 137, 2, true, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}}, + {42, 137, 2, false, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 49}, + {42, 137, 2, true, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 143}, + {42, 137, 2, false, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}}, + {42, 137, 2, true, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}}, + {42, 137, 2, false, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 49}, + {42, 137, 2, true, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 143}, + {42, 137, 2, false, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}}, + {42, 137, 2, true, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}}, + // CSR does not support ld_out + {42, 137, 2, false, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 0}, + {42, 137, 2, true, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 0}, + {3, 4, 2, false, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, false, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, true, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, + {3, 4, 2, false, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, false, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, true, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, + {3, 4, 2, false, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, false, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, + {42, 137, 2, true, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, + // Distance kernel does not support LD parameter yet. + //{42, 137, 2, false, {KernelType::RBF, 0, 0.5}, 64, 155, 49}, + //{42, 137, 2, true, {KernelType::RBF, 0, 0.5}, 64, 155, 143}, +}; + +template +class GramMatrixTest : public ::testing::TestWithParam { + protected: + GramMatrixTest() + : params(GetParam()), + stream(0), + x1(0, stream), + x2(0, stream), + x1_csr_indptr(0, stream), + x1_csr_indices(0, stream), + x1_csr_data(0, stream), + x2_csr_indptr(0, stream), + x2_csr_indices(0, stream), + x2_csr_data(0, stream), + gram(0, stream), + gram_host(0) + { + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + if (params.ld1 == 0) { params.ld1 = params.is_row_major ? params.n_cols : params.n1; } + if (params.ld2 == 0) { params.ld2 = params.is_row_major ? params.n_cols : params.n2; } + if (params.ld_out == 0) { params.ld_out = params.is_row_major ? params.n2 : params.n1; } + // Derive the size of the output from the offset of the last element. + size_t size = get_offset(params.n1 - 1, params.n_cols - 1, params.ld1, params.is_row_major) + 1; + x1.resize(size, stream); + size = get_offset(params.n2 - 1, params.n_cols - 1, params.ld2, params.is_row_major) + 1; + x2.resize(size, stream); + size = get_offset(params.n1 - 1, params.n2 - 1, params.ld_out, params.is_row_major) + 1; + + gram.resize(size, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(gram.data(), 0, gram.size() * sizeof(math_t), stream)); + gram_host.resize(gram.size()); + std::fill(gram_host.begin(), gram_host.end(), 0); + + raft::random::Rng r(42137ULL); + r.uniform(x1.data(), x1.size(), math_t(0), math_t(1), stream); + r.uniform(x2.data(), x2.size(), math_t(0), math_t(1), stream); + } + + ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); } + + // Calculate the Gram matrix on the host. + void naiveKernel() + { + std::vector x1_host(x1.size()); + raft::update_host(x1_host.data(), x1.data(), x1.size(), stream); + std::vector x2_host(x2.size()); + raft::update_host(x2_host.data(), x2.data(), x2.size(), stream); + handle.sync_stream(stream); + + for (int i = 0; i < params.n1; i++) { + for (int j = 0; j < params.n2; j++) { + float d = 0; + for (int k = 0; k < params.n_cols; k++) { + if (params.kernel.kernel == KernelType::RBF) { + math_t diff = x1_host[get_offset(i, k, params.ld1, params.is_row_major)] - + x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; + d += diff * diff; + } else { + d += x1_host[get_offset(i, k, params.ld1, params.is_row_major)] * + x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; + } + } + int idx = get_offset(i, j, params.ld_out, params.is_row_major); + math_t v = 0; + switch (params.kernel.kernel) { + case (KernelType::LINEAR): gram_host[idx] = d; break; + case (KernelType::POLYNOMIAL): + v = params.kernel.gamma * d + params.kernel.coef0; + gram_host[idx] = std::pow(v, params.kernel.degree); + break; + case (KernelType::TANH): + gram_host[idx] = std::tanh(params.kernel.gamma * d + params.kernel.coef0); + break; + case (KernelType::RBF): gram_host[idx] = exp(-params.kernel.gamma * d); break; + } + } + } + } + + int prepareCsr(math_t* dense, int n_rows, int ld, int* indptr, int* indices, math_t* data) + { + int nnz = 0; + double eps = 1e-6; + int n_cols = params.n_cols; + bool is_row_major = params.is_row_major; + size_t dense_size = get_offset(n_rows - 1, n_cols - 1, ld, is_row_major) + 1; + + std::vector dense_host(dense_size); + raft::update_host(dense_host.data(), dense, dense_size, stream); + handle.sync_stream(stream); + + std::vector indptr_host(n_rows + 1); + std::vector indices_host(n_rows * n_cols); + std::vector data_host(n_rows * n_cols); + + // create csr matrix from dense (with threshold) + for (int i = 0; i < n_rows; ++i) { + indptr_host[i] = nnz; + for (int j = 0; j < n_cols; ++j) { + math_t value = dense_host[get_offset(i, j, ld, is_row_major)]; + if (value > eps) { + indices_host[nnz] = j; + data_host[nnz] = value; + nnz++; + } + } + } + indptr_host[n_rows] = nnz; + + // fill back dense matrix from CSR + std::fill(dense_host.data(), dense_host.data() + dense_size, 0); + for (int i = 0; i < n_rows; ++i) { + for (int idx = indptr_host[i]; idx < indptr_host[i + 1]; ++idx) { + dense_host[get_offset(i, indices_host[idx], ld, is_row_major)] = data_host[idx]; + } + } + + raft::update_device(dense, dense_host.data(), dense_size, stream); + raft::update_device(indptr, indptr_host.data(), n_rows + 1, stream); + raft::update_device(indices, indices_host.data(), nnz, stream); + raft::update_device(data, data_host.data(), nnz, stream); + handle.sync_stream(stream); + + return nnz; + } + + void runTest() + { + std::unique_ptr> kernel = + std::unique_ptr>(KernelFactory::create(params.kernel, handle)); + + Matrix* x1_matrix = nullptr; + Matrix* x2_matrix = nullptr; + + if (params.sparse_input != SparseType::DENSE) { + x1_csr_indptr.reserve(params.n1 + 1, stream); + x1_csr_indices.reserve(params.n1 * params.n_cols, stream); + x1_csr_data.reserve(params.n1 * params.n_cols, stream); + int nnz = prepareCsr(x1.data(), + params.n1, + params.ld1, + x1_csr_indptr.data(), + x1_csr_indices.data(), + x1_csr_data.data()); + x1_matrix = new CsrMatrix(x1_csr_indptr.data(), + x1_csr_indices.data(), + x1_csr_data.data(), + nnz, + params.n1, + params.n_cols); + } else { + x1_matrix = new DenseMatrix( + x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); + } + + if (params.sparse_input == SparseType::CSR) { + x2_csr_indptr.reserve(params.n2 + 1, stream); + x2_csr_indices.reserve(params.n2 * params.n_cols, stream); + x2_csr_data.reserve(params.n2 * params.n_cols, stream); + int nnz = prepareCsr(x2.data(), + params.n2, + params.ld2, + x2_csr_indptr.data(), + x2_csr_indices.data(), + x2_csr_data.data()); + x2_matrix = new CsrMatrix(x2_csr_indptr.data(), + x2_csr_indices.data(), + x2_csr_data.data(), + nnz, + params.n2, + params.n_cols); + } else { + x2_matrix = new DenseMatrix( + x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); + } + + DenseMatrix gram_dense( + gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); + + naiveKernel(); + + (*kernel)(*x1_matrix, *x2_matrix, gram_dense, stream); + handle.sync_stream(stream); + + ASSERT_TRUE(raft::devArrMatchHost( + gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); + + delete x1_matrix; + delete x2_matrix; + } + + raft::device_resources handle; + cudaStream_t stream = 0; + GramMatrixInputs params; + + rmm::device_uvector x1; + rmm::device_uvector x2; + + rmm::device_uvector x1_csr_indptr; + rmm::device_uvector x1_csr_indices; + rmm::device_uvector x1_csr_data; + rmm::device_uvector x2_csr_indptr; + rmm::device_uvector x2_csr_indices; + rmm::device_uvector x2_csr_data; + + rmm::device_uvector gram; + std::vector gram_host; +}; + +typedef GramMatrixTest GramMatrixTestFloat; +typedef GramMatrixTest GramMatrixTestDouble; + +TEST_P(GramMatrixTestFloat, Gram) { runTest(); } + +INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloat, ::testing::ValuesIn(inputs)); +}; // end namespace raft::distance::kernels From 8174693cf2281eae90b976b18e49dec2c1ec8e75 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Sun, 12 Mar 2023 10:35:14 -0700 Subject: [PATCH 07/20] review suggestions --- .../distance/detail/kernels/gram_matrix.cuh | 19 +- .../detail/kernels/kernel_matrices.cuh | 81 ++++---- cpp/test/distance/gram.cu | 61 ++---- cpp/test/distance/gram_base.cuh | 87 ++++++++ cpp/test/sparse/gram.cu | 185 ++++++++---------- 5 files changed, 237 insertions(+), 196 deletions(-) create mode 100644 cpp/test/distance/gram_base.cuh diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 1a2b4d67f8..65961e3089 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -48,7 +48,6 @@ class GramMatrixBase { GramMatrixBase(const raft::device_resources& handle) : handle(handle){}; virtual ~GramMatrixBase(){}; - /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format * @@ -56,22 +55,22 @@ class GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out (dense) device matrix to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param dot_x1 optional dot product of x1 for expanded computation within RBF. - * @param dot_x2 optional dot product of x2 for expanded computation within RBF. + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ virtual void operator()(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - math_t* dot_x1 = nullptr, - math_t* dot_x2 = nullptr) + math_t* norm_x1 = nullptr, + math_t* norm_x2 = nullptr) { ASSERT(x1.n_rows == out.n_rows, "GramMatrix input matrix dimensions for x1 and out do not match"); ASSERT(x2.n_rows == out.n_cols, "GramMatrix input matrix dimensions for x2 and out do not match"); ASSERT(x1.n_cols == x2.n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match"); - evaluate(x1, x2, out, stream, dot_x1, dot_x2); + evaluate(x1, x2, out, stream, norm_x1, norm_x2); } /** Evaluate the Gram matrix for two vector sets using simple dot product. @@ -80,15 +79,15 @@ class GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param dot_x1 optional dot product of x1 for expanded computation within RBF. - * @param dot_x2 optional dot product of x2 for expanded computation within RBF. + * @param norm_x1 unused. + * @param norm_x2 unused. */ virtual void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - math_t* dot_x1, - math_t* dot_x2) + math_t* norm_x1, + math_t* norm_x2) { linear(x1, x2, out, stream); } diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 5079a87027..8b7954214c 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -102,26 +102,33 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga } /** Epiloge function for rbf kernel using expansion. - * Calculates output_ij = exp(-gain * (norm_i + norm_j - 2*input_ij)); + * + * Calculates output_ij = exp(-gain * (norm_x_i + norm_y_j - 2*input_ij)); + * + * Intended usage + * - input is the product of two matrices X and Y input_ij = \sum_k X_ik * Y_jk + * - norm_x_i = l2_norm(x_i), where x_i is the i-th row of matrix X + * - norm_y_j = l2_norm(y_j), where y_j is the j-th row of matrix Y + * * @param inout device vector in column major format, size [ld * cols] * @param ld leading dimension of the inout buffer * @param rows number of rows (rows <= ld) * @param cols number of columns - * @param dot_rows dot product for row indices - * @param dot_cols dot product for column indices + * @param norm_x l2-norm of X's rows + * @param norm_y l2-norm of Y's rows * @param gain */ template __global__ void rbf_kernel_expanded( - math_t* inout, int ld, int rows, int cols, math_t* dot_rows, math_t* dot_cols, math_t gain) + math_t* inout, int ld, int rows, int cols, math_t* norm_x, math_t* norm_y, math_t gain) { for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols; tidy += blockDim.y * gridDim.y) { - math_t norm_y = dot_cols[tidy]; + math_t norm_y_val = norm_y[tidy]; for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows; tidx += blockDim.x * gridDim.x) { inout[tidx + tidy * ld] = - exp(-1.0 * gain * (dot_rows[tidx] + norm_y - inout[tidx + tidy * ld] * 2)); + exp(-1.0 * gain * (norm_x[tidx] + norm_y_val - inout[tidx + tidy * ld] * 2)); } } } @@ -181,15 +188,15 @@ class PolynomialKernel : public GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param dot_x1 optional dot product of x1 for expanded computation within RBF. - * @param dot_x2 optional dot product of x2 for expanded computation within RBF. + * @param norm_x1 unused. + * @param norm_x2 unused. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - math_t* dot_x1, - math_t* dot_x2) + math_t* norm_x1, + math_t* norm_x2) { GramMatrixBase::linear(x1, x2, out, stream); applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); @@ -247,15 +254,15 @@ class TanhKernel : public GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param dot_x1 optional dot product of x1 for expanded computation within RBF. - * @param dot_x2 optional dot product of x2 for expanded computation within RBF. + * @param norm_x1 unused. + * @param norm_x2 unused. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - math_t* dot_x1, - math_t* dot_x2) + math_t* norm_x1, + math_t* norm_x2) { GramMatrixBase::linear(x1, x2, out, stream); applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); @@ -273,19 +280,19 @@ class RBFKernel : public GramMatrixBase { int ld, int rows, int cols, - math_t* dot_x1, - math_t* dot_x2, + math_t* norm_x1, + math_t* norm_x2, bool is_row_major, cudaStream_t stream) { - int n1 = is_row_major ? cols : rows; - int n2 = is_row_major ? rows : cols; - math_t* dot_n1 = is_row_major ? dot_x2 : dot_x1; - math_t* dot_n2 = is_row_major ? dot_x1 : dot_x2; + int n1 = is_row_major ? cols : rows; + int n2 = is_row_major ? rows : cols; + math_t* norm_n1 = is_row_major ? norm_x2 : norm_x1; + math_t* norm_n2 = is_row_major ? norm_x1 : norm_x2; rbf_kernel_expanded<<>>(inout, ld, n1, n2, dot_n1, dot_n2, gain); + stream>>>(inout, ld, n1, n2, norm_n1, norm_n2, gain); } public: @@ -336,37 +343,37 @@ class RBFKernel : public GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream - * @param dot_x1 optional dot product of x1 for expanded computation within RBF. - * @param dot_x2 optional dot product of x2 for expanded computation within RBF. + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, cudaStream_t stream, - math_t* dot_x1, - math_t* dot_x2) + math_t* norm_x1, + math_t* norm_x2) { - if (x1.isDense() && x2.isDense() && (dot_x1 == nullptr || dot_x2 == nullptr)) { + if (x1.isDense() && x2.isDense() && (norm_x1 == nullptr || norm_x2 == nullptr)) { auto x1_dense = x1.asDense(); auto x2_dense = x2.asDense(); distance_rbf(*x1_dense, *x2_dense, out, stream); } else { - rmm::device_uvector tmp_dot_x1(0, stream); - rmm::device_uvector tmp_dot_x2(0, stream); - if (dot_x1 == nullptr) { - tmp_dot_x1.reserve(x1.n_rows, stream); - dot_x1 = tmp_dot_x1.data(); - matrixDot(x1, dot_x1, stream); + rmm::device_uvector tmp_norm_x1(0, stream); + rmm::device_uvector tmp_norm_x2(0, stream); + if (norm_x1 == nullptr) { + tmp_norm_x1.reserve(x1.n_rows, stream); + norm_x1 = tmp_norm_x1.data(); + matrixDot(x1, norm_x1, stream); } - if (dot_x2 == nullptr) { - tmp_dot_x2.reserve(x2.n_rows, stream); - dot_x2 = tmp_dot_x2.data(); - matrixDot(x2, dot_x2, stream); + if (norm_x2 == nullptr) { + tmp_norm_x2.reserve(x2.n_rows, stream); + norm_x2 = tmp_norm_x2.data(); + matrixDot(x2, norm_x2, stream); } // compute L2expanded GramMatrixBase::linear(x1, x2, out, stream); applyExpandedRbfKernel( - out.data, out.ld, out.n_rows, out.n_cols, dot_x1, dot_x2, out.is_row_major, stream); + out.data, out.ld, out.n_rows, out.n_cols, norm_x1, norm_x2, out.is_row_major, stream); } } diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index 7ea9cc3c7d..6a93fed0ad 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -19,6 +19,7 @@ #endif #include "../test_utils.cuh" +#include "gram_base.cuh" #include #include #include @@ -34,12 +35,6 @@ namespace raft::distance::kernels { using namespace raft::distance::matrix::detail; -// Get the offset of element [i,k]. -HDI int get_offset(int i, int k, int ld, bool is_row_major) -{ - return is_row_major ? i * ld + k : i + k * ld; -} - struct GramMatrixInputs { int n1; // feature vectors in matrix 1 int n2; // featuer vectors in matrix 2 @@ -113,45 +108,6 @@ class GramMatrixTest : public ::testing::TestWithParam { ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); } - // Calculate the Gram matrix on the host. - void naiveKernel() - { - std::vector x1_host(x1.size()); - raft::update_host(x1_host.data(), x1.data(), x1.size(), stream); - std::vector x2_host(x2.size()); - raft::update_host(x2_host.data(), x2.data(), x2.size(), stream); - handle.sync_stream(stream); - - for (int i = 0; i < params.n1; i++) { - for (int j = 0; j < params.n2; j++) { - float d = 0; - for (int k = 0; k < params.n_cols; k++) { - if (params.kernel.kernel == KernelType::RBF) { - math_t diff = x1_host[get_offset(i, k, params.ld1, params.is_row_major)] - - x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; - d += diff * diff; - } else { - d += x1_host[get_offset(i, k, params.ld1, params.is_row_major)] * - x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; - } - } - int idx = get_offset(i, j, params.ld_out, params.is_row_major); - math_t v = 0; - switch (params.kernel.kernel) { - case (KernelType::LINEAR): gram_host[idx] = d; break; - case (KernelType::POLYNOMIAL): - v = params.kernel.gamma * d + params.kernel.coef0; - gram_host[idx] = std::pow(v, params.kernel.degree); - break; - case (KernelType::TANH): - gram_host[idx] = std::tanh(params.kernel.gamma * d + params.kernel.coef0); - break; - case (KernelType::RBF): gram_host[idx] = exp(-params.kernel.gamma * d); break; - } - } - } - } - void runTest() { std::unique_ptr> kernel = @@ -166,7 +122,20 @@ class GramMatrixTest : public ::testing::TestWithParam { (*kernel)(x1_dense, x2_dense, gram_dense, stream); - naiveKernel(); + naiveGramMatrixKernel(params.n1, + params.n2, + params.n_cols, + x1, + x2, + gram_host.data(), + params.ld1, + params.ld2, + params.ld_out, + params.is_row_major, + params.kernel, + stream, + handle); + ASSERT_TRUE(raft::devArrMatchHost( gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); } diff --git a/cpp/test/distance/gram_base.cuh b/cpp/test/distance/gram_base.cuh new file mode 100644 index 0000000000..8c0652bc16 --- /dev/null +++ b/cpp/test/distance/gram_base.cuh @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace distance { +namespace kernels { + +// Get the offset of element [i,k]. +HDI int get_offset(int i, int k, int ld, bool is_row_major) +{ + return is_row_major ? i * ld + k : i + k * ld; +} + +// Calculate the Gram matrix on the host. +template +void naiveGramMatrixKernel(int n1, + int n2, + int n_cols, + const rmm::device_uvector& x1, + const rmm::device_uvector& x2, + math_t* gram_host, + int ld1, + int ld2, + int ld_out, + bool is_row_major, + KernelParams kernel, + cudaStream_t stream, + const raft::device_resources& handle) +{ + std::vector x1_host(x1.size()); + raft::update_host(x1_host.data(), x1.data(), x1.size(), stream); + std::vector x2_host(x2.size()); + raft::update_host(x2_host.data(), x2.data(), x2.size(), stream); + handle.sync_stream(stream); + + for (int i = 0; i < n1; i++) { + for (int j = 0; j < n2; j++) { + float d = 0; + for (int k = 0; k < n_cols; k++) { + if (kernel.kernel == KernelType::RBF) { + math_t diff = x1_host[get_offset(i, k, ld1, is_row_major)] - + x2_host[get_offset(j, k, ld2, is_row_major)]; + d += diff * diff; + } else { + d += x1_host[get_offset(i, k, ld1, is_row_major)] * + x2_host[get_offset(j, k, ld2, is_row_major)]; + } + } + int idx = get_offset(i, j, ld_out, is_row_major); + math_t v = 0; + switch (kernel.kernel) { + case (KernelType::LINEAR): gram_host[idx] = d; break; + case (KernelType::POLYNOMIAL): + v = kernel.gamma * d + kernel.coef0; + gram_host[idx] = std::pow(v, kernel.degree); + break; + case (KernelType::TANH): gram_host[idx] = std::tanh(kernel.gamma * d + kernel.coef0); break; + case (KernelType::RBF): gram_host[idx] = exp(-kernel.gamma * d); break; + } + } + } +} + +} // namespace kernels +} // namespace distance +} // namespace raft diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index 2cf880d23e..bd714d25b3 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -18,6 +18,7 @@ #include #endif +#include "../distance/gram_base.cuh" #include "../test_utils.cuh" #include #include @@ -29,18 +30,19 @@ #include #include #include +#include #include namespace raft::distance::kernels { using namespace raft::distance::matrix::detail; -// Get the offset of element [i,k]. -HDI int get_offset(int i, int k, int ld, bool is_row_major) -{ - return is_row_major ? i * ld + k : i + k * ld; -} - +/** + * Structure to describe structure of the input matrices: + * - DENSE: dense, dense + * - MIX: CSR, dense + * - CSR: CSR, CSR + */ enum SparseType { DENSE, MIX, CSR }; struct GramMatrixInputs { @@ -69,59 +71,56 @@ std::ostream& operator<<(std::ostream& os, const GramMatrixInputs& p) return os; } -const std::vector inputs = { - {42, 137, 2, false, SparseType::DENSE, {KernelType::LINEAR}}, - {42, 137, 2, true, SparseType::DENSE, {KernelType::LINEAR}}, - {42, 137, 2, false, SparseType::DENSE, {KernelType::LINEAR}, 64, 179, 181}, - {42, 137, 2, true, SparseType::DENSE, {KernelType::LINEAR}, 64, 179, 181}, - {42, 137, 2, false, SparseType::MIX, {KernelType::LINEAR}}, - {42, 137, 2, true, SparseType::MIX, {KernelType::LINEAR}}, - {42, 137, 2, false, SparseType::MIX, {KernelType::LINEAR}, 64, 179, 181}, - {42, 137, 2, true, SparseType::MIX, {KernelType::LINEAR}, 64, 179, 181}, - {42, 137, 2, false, SparseType::CSR, {KernelType::LINEAR}}, - {42, 137, 2, true, SparseType::CSR, {KernelType::LINEAR}}, - // CSR does not support ld_out - {42, 137, 2, false, SparseType::CSR, {KernelType::LINEAR}, 64, 179, 0}, - {42, 137, 2, true, SparseType::CSR, {KernelType::LINEAR}, 64, 179, 0}, - {137, 42, 2, false, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - {137, 42, 2, true, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - {137, 42, 2, false, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, - {137, 42, 2, true, SparseType::DENSE, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, - {137, 42, 2, false, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - {137, 42, 2, true, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - {137, 42, 2, false, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, - {137, 42, 2, true, SparseType::MIX, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 144}, - {137, 42, 2, false, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - {137, 42, 2, true, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}}, - // CSR does not support ld_out - {137, 42, 2, false, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 0}, - {137, 42, 2, true, SparseType::CSR, {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, 159, 73, 0}, - {42, 137, 2, false, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}}, - {42, 137, 2, true, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}}, - {42, 137, 2, false, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 49}, - {42, 137, 2, true, SparseType::DENSE, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 143}, - {42, 137, 2, false, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}}, - {42, 137, 2, true, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}}, - {42, 137, 2, false, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 49}, - {42, 137, 2, true, SparseType::MIX, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 143}, - {42, 137, 2, false, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}}, - {42, 137, 2, true, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}}, - // CSR does not support ld_out - {42, 137, 2, false, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 0}, - {42, 137, 2, true, SparseType::CSR, {KernelType::TANH, 0, 0.5, 2.4}, 64, 155, 0}, - {3, 4, 2, false, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, false, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, true, SparseType::DENSE, {KernelType::RBF, 0, 0.5}}, - {3, 4, 2, false, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, false, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, true, SparseType::MIX, {KernelType::RBF, 0, 0.5}}, - {3, 4, 2, false, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, false, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, - {42, 137, 2, true, SparseType::CSR, {KernelType::RBF, 0, 0.5}}, - // Distance kernel does not support LD parameter yet. - //{42, 137, 2, false, {KernelType::RBF, 0, 0.5}, 64, 155, 49}, - //{42, 137, 2, true, {KernelType::RBF, 0, 0.5}, 64, 155, 143}, -}; +/*struct KernelParams { + // Kernel function parameters + KernelType kernel; //!< Type of the kernel function + int degree; //!< Degree of polynomial kernel (ignored by others) + double gamma; //!< multiplier in the + double coef0; //!< additive constant in poly and tanh kernels +};*/ + +// const KernelParams linear_kernel_params{.kernel=KernelType::LINEAR}; + +// {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, {KernelType::TANH, 0, 0.5, 2.4}, {KernelType::RBF, 0, 0.5} +const std::vector inputs = raft::util::itertools::product( + {42}, + {137}, + {2}, + {true, false}, + {SparseType::DENSE, SparseType::MIX, SparseType::CSR}, + {KernelParams{KernelType::LINEAR}, + KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4}, + KernelParams{KernelType::TANH, 0, 0.5, 2.4}, + KernelParams{KernelType::RBF, 0, 0.5}}); + +// (ld_1, ld_2, ld_out) not supported by RBF and CSR +const std::vector inputs_ld = raft::util::itertools::product( + {137}, + {42}, + {2}, + {true, false}, + {SparseType::DENSE, SparseType::MIX}, + {KernelParams{KernelType::LINEAR}, + KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4}, + KernelParams{KernelType::TANH, 0, 0.5, 2.4}}, + {159}, + {73}, + {144}); + +// (ld_1, ld_2) are supported by CSR +const std::vector inputs_ld_csr = + raft::util::itertools::product( + {42}, + {137}, + {2}, + {true, false}, + {SparseType::CSR, SparseType::MIX}, + {KernelParams{KernelType::LINEAR}, + KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4}, + KernelParams{KernelType::TANH, 0, 0.5, 2.4}}, + {64}, + {155}, + {0}); template class GramMatrixTest : public ::testing::TestWithParam { @@ -164,45 +163,6 @@ class GramMatrixTest : public ::testing::TestWithParam { ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); } - // Calculate the Gram matrix on the host. - void naiveKernel() - { - std::vector x1_host(x1.size()); - raft::update_host(x1_host.data(), x1.data(), x1.size(), stream); - std::vector x2_host(x2.size()); - raft::update_host(x2_host.data(), x2.data(), x2.size(), stream); - handle.sync_stream(stream); - - for (int i = 0; i < params.n1; i++) { - for (int j = 0; j < params.n2; j++) { - float d = 0; - for (int k = 0; k < params.n_cols; k++) { - if (params.kernel.kernel == KernelType::RBF) { - math_t diff = x1_host[get_offset(i, k, params.ld1, params.is_row_major)] - - x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; - d += diff * diff; - } else { - d += x1_host[get_offset(i, k, params.ld1, params.is_row_major)] * - x2_host[get_offset(j, k, params.ld2, params.is_row_major)]; - } - } - int idx = get_offset(i, j, params.ld_out, params.is_row_major); - math_t v = 0; - switch (params.kernel.kernel) { - case (KernelType::LINEAR): gram_host[idx] = d; break; - case (KernelType::POLYNOMIAL): - v = params.kernel.gamma * d + params.kernel.coef0; - gram_host[idx] = std::pow(v, params.kernel.degree); - break; - case (KernelType::TANH): - gram_host[idx] = std::tanh(params.kernel.gamma * d + params.kernel.coef0); - break; - case (KernelType::RBF): gram_host[idx] = exp(-params.kernel.gamma * d); break; - } - } - } - } - int prepareCsr(math_t* dense, int n_rows, int ld, int* indptr, int* indices, math_t* data) { int nnz = 0; @@ -303,7 +263,19 @@ class GramMatrixTest : public ::testing::TestWithParam { DenseMatrix gram_dense( gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); - naiveKernel(); + naiveGramMatrixKernel(params.n1, + params.n2, + params.n_cols, + x1, + x2, + gram_host.data(), + params.ld1, + params.ld2, + params.ld_out, + params.is_row_major, + params.kernel, + stream, + handle); (*kernel)(*x1_matrix, *x2_matrix, gram_dense, stream); handle.sync_stream(stream); @@ -333,10 +305,17 @@ class GramMatrixTest : public ::testing::TestWithParam { std::vector gram_host; }; -typedef GramMatrixTest GramMatrixTestFloat; -typedef GramMatrixTest GramMatrixTestDouble; +typedef GramMatrixTest GramMatrixTestFloatStandard; +typedef GramMatrixTest GramMatrixTestFloatLd; +typedef GramMatrixTest GramMatrixTestFloatLdCsr; -TEST_P(GramMatrixTestFloat, Gram) { runTest(); } +TEST_P(GramMatrixTestFloatStandard, Gram) { runTest(); } +TEST_P(GramMatrixTestFloatLd, Gram) { runTest(); } +TEST_P(GramMatrixTestFloatLdCsr, Gram) { runTest(); } -INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloat, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloatStandard, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloatLd, ::testing::ValuesIn(inputs_ld)); +INSTANTIATE_TEST_SUITE_P(GramMatrixTests, + GramMatrixTestFloatLdCsr, + ::testing::ValuesIn(inputs_ld_csr)); }; // end namespace raft::distance::kernels From 5bbcd0018372d112cfce2d3dbe13b421406c1732 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Tue, 14 Mar 2023 10:56:16 +0000 Subject: [PATCH 08/20] review comments norm --- cpp/include/raft/sparse/linalg/detail/norm.cuh | 16 ++++++++-------- cpp/include/raft/sparse/linalg/norm.cuh | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 7dbea8c76c..5af7749c39 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -187,7 +187,7 @@ template __global__ void __launch_bounds__(Policy::ThreadsPerBlock) - csrReductionKernel(Type* dots, + csrReductionKernel(Type* norm, const IdxType* ia, const Type* data, IdxType N, @@ -204,7 +204,7 @@ __global__ void __launch_bounds__(Policy::ThreadsPerBlock) acc = reduce_op(acc, main_op(data[j])); } acc = raft::logicalWarpReduce(acc, reduce_op); - if (threadIdx.x == 0) { dots[i] = final_op(acc); } + if (threadIdx.x == 0) { norm[i] = final_op(acc); } } template -void csrReduction(Type* dots, +void csrReduction(Type* norm, const IdxType* ia, const Type* data, IdxType N, @@ -228,12 +228,12 @@ void csrReduction(Type* dots, dim3 threads(Policy::LogicalWarpSize, Policy::RowsPerBlock, 1); dim3 blocks(ceildiv(N, Policy::RowsPerBlock), 1, 1); csrReductionKernel - <<>>(dots, ia, data, N, init, main_op, reduce_op, final_op); + <<>>(norm, ia, data, N, init, main_op, reduce_op, final_op); RAFT_CUDA_TRY(cudaPeekAtLastError()); } template -void rowNormCsrCaller(Type* dots, +void rowNormCsrCaller(Type* norm, const IdxType* ia, const Type* data, IdxType nnz, @@ -246,15 +246,15 @@ void rowNormCsrCaller(Type* dots, switch (type) { case raft::linalg::NormType::L1Norm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::abs_op(), raft::add_op(), fin_op); + norm, ia, data, N, (Type)0, stream, raft::abs_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::L2Norm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::sq_op(), raft::add_op(), fin_op); + norm, ia, data, N, (Type)0, stream, raft::sq_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::LinfNorm: csrReduction>( - dots, ia, data, N, (Type)0, stream, raft::abs_op(), raft::max_op(), fin_op); + norm, ia, data, N, (Type)0, stream, raft::abs_op(), raft::max_op(), fin_op); break; default: THROW("Unsupported norm type: %d", type); }; diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index d504e735fb..6f01569a98 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -77,8 +77,8 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) * @tparam Type the data type * @tparam Lambda device final lambda * @tparam IdxType Integer type used to for addressing - * @param dots the output vector of row-wise dot products - * @param ia the input matrix row pointers + * @param norm the output vector of row-wise norm, size [N] + * @param ia the input matrix row index array * @param data the input matrix nnz data * @param N number of rows of data * @param type the type of norm to be applied @@ -86,7 +86,7 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) * @param fin_op the final lambda op */ template -void rowNormCsr(Type* dots, +void rowNormCsr(Type* norm, const IdxType* ia, const Type* data, IdxType nnz, @@ -95,7 +95,7 @@ void rowNormCsr(Type* dots, cudaStream_t stream, Lambda fin_op = raft::identity_op()) { - detail::rowNormCsrCaller(dots, ia, data, nnz, N, type, stream, fin_op); + detail::rowNormCsrCaller(norm, ia, data, nnz, N, type, stream, fin_op); } }; // end NAMESPACE linalg From 86a03148b32f0977720ff134448612130b37041e Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Tue, 14 Mar 2023 17:51:50 +0000 Subject: [PATCH 09/20] removed handle member, but re-introduced old API to ensure backwards compatibility until cuml is updated --- .../distance/detail/kernels/gram_matrix.cuh | 118 ++++++++++--- .../detail/kernels/kernel_factory.cuh | 18 +- .../detail/kernels/kernel_matrices.cuh | 165 +++++++++++++++--- 3 files changed, 259 insertions(+), 42 deletions(-) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 65961e3089..14113bc2a7 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -42,10 +42,14 @@ namespace raft::distance::kernels::detail { */ template class GramMatrixBase { - const raft::device_resources& handle; + protected: + cublasHandle_t cublas_handle; + bool legacy_interface; public: - GramMatrixBase(const raft::device_resources& handle) : handle(handle){}; + GramMatrixBase() : legacy_interface(false){}; + [[deprecated]] GramMatrixBase(cublasHandle_t cublas_handle) + : cublas_handle(cublas_handle), legacy_interface(true){}; virtual ~GramMatrixBase(){}; /** Convenience function to evaluate the Gram matrix for two vector sets. @@ -54,14 +58,14 @@ class GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out (dense) device matrix to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ virtual void operator()(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, + const raft::device_resources& handle, math_t* norm_x1 = nullptr, math_t* norm_x2 = nullptr) { @@ -70,7 +74,7 @@ class GramMatrixBase { ASSERT(x2.n_rows == out.n_cols, "GramMatrix input matrix dimensions for x2 and out do not match"); ASSERT(x1.n_cols == x2.n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match"); - evaluate(x1, x2, out, stream, norm_x1, norm_x2); + evaluate(x1, x2, out, handle, norm_x1, norm_x2); } /** Evaluate the Gram matrix for two vector sets using simple dot product. @@ -78,18 +82,18 @@ class GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ virtual void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, + const raft::device_resources& handle, math_t* norm_x1, math_t* norm_x2) { - linear(x1, x2, out, stream); + linear(x1, x2, out, handle); } // private: @@ -107,11 +111,13 @@ class GramMatrixBase { * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] * @param [in] stream cuda stream + + @param [in] handle raft handle */ void linear(const raft::distance::matrix::detail::DenseMatrix& x1, const raft::distance::matrix::detail::DenseMatrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream) + cudaStream_t stream, + cublasHandle_t cublas_handle) { ASSERT(x1.is_row_major == x2.is_row_major, "GramMatrix leading dimensions for x1 and x2 do not match"); @@ -122,7 +128,7 @@ class GramMatrixBase { math_t beta = 0.0; if (out.is_row_major) { // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, out.n_cols, @@ -139,7 +145,7 @@ class GramMatrixBase { stream)); } else { // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, out.n_rows, @@ -160,7 +166,8 @@ class GramMatrixBase { void linear(const raft::distance::matrix::detail::CsrMatrix& x1, const raft::distance::matrix::detail::DenseMatrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream) + cudaStream_t stream, + const cusparseHandle_t& cusparse_handle) { math_t alpha = 1.0; math_t beta = 0.0; @@ -194,7 +201,7 @@ class GramMatrixBase { auto opX2 = CUSPARSE_OPERATION_TRANSPOSE; size_t bufferSize; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(cusparse_handle, opX1, opX2, &alpha, @@ -210,7 +217,7 @@ class GramMatrixBase { rmm::device_uvector tmp(bufferSize, stream); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(), + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(cusparse_handle, opX1, opX2, &alpha, @@ -231,7 +238,7 @@ class GramMatrixBase { void linear(const raft::distance::matrix::detail::CsrMatrix& x1, const raft::distance::matrix::detail::CsrMatrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream) + const raft::device_resources& handle) { int minor_out = out.is_row_major ? out.n_cols : out.n_rows; ASSERT(out.ld == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); @@ -279,29 +286,100 @@ class GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle */ void linear(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream) + const raft::device_resources& handle) { // dispatch if (x1.isDense()) { ASSERT(x2.isDense(), "GramMatrix input matrix does not allow Dense*Csr"); auto x1_dense = x1.asDense(); auto x2_dense = x2.asDense(); - linear(*x1_dense, *x2_dense, out, stream); + linear(*x1_dense, *x2_dense, out, handle.get_stream(), handle.get_cublas_handle()); } else { auto x1_csr = x1.asCsr(); if (x2.isDense()) { auto x2_dense = x2.asDense(); - linear(*x1_csr, *x2_dense, out, stream); + linear(*x1_csr, *x2_dense, out, handle.get_stream(), handle.get_cusparse_handle()); } else { auto x2_csr = x2.asCsr(); - linear(*x1_csr, *x2_csr, out, stream); + linear(*x1_csr, *x2_csr, out, handle); } } } + + /** Evaluate the Gram matrix for two vector sets using simple dot product. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 (usually it is n1) + * @param ld2 leading dimension of x2 (usually it is n2) + * @param ld_out leading dimension of out (usually it is n1) + */ + [[deprecated]] virtual void evaluate(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) + { + ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor."); + raft::distance::matrix::detail::DenseMatrix dense1( + const_cast(x1), n1, n_cols, is_row_major, ld1); + raft::distance::matrix::detail::DenseMatrix dense2( + const_cast(x2), n2, n_cols, is_row_major, ld2); + raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); + linear(dense1, dense2, dense_out, stream, cublas_handle); + } + + /** Convenience function to evaluate the Gram matrix for two vector sets. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 + * @param ld2 leading dimension of x2 + * @param ld_out leading dimension of out + */ + [[deprecated]] void operator()(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1 = 0, + int ld2 = 0, + int ld_out = 0) + { + ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor."); + if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; } + if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } + if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } + evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + } }; + }; // end namespace raft::distance::kernels::detail diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index ad4a81c55a..7c74e231d7 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -26,7 +26,23 @@ namespace raft::distance::kernels::detail { template class KernelFactory { public: - static GramMatrixBase* create(KernelParams params, const raft::device_resources& handle) + static GramMatrixBase* create(KernelParams params) + { + GramMatrixBase* res; + // KernelParams is not templated, we convert the parameters to math_t here: + math_t coef0 = params.coef0; + math_t gamma = params.gamma; + switch (params.kernel) { + case LINEAR: res = new GramMatrixBase(); break; + case POLYNOMIAL: res = new PolynomialKernel(params.degree, gamma, coef0); break; + case TANH: res = new TanhKernel(gamma, coef0); break; + case RBF: res = new RBFKernel(gamma); break; + default: throw raft::exception("Kernel not implemented"); + } + return res; + } + + [[deprecated]] static GramMatrixBase* create(KernelParams params, cublasHandle_t handle) { GramMatrixBase* res; // KernelParams is not templated, we convert the parameters to math_t here: diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 8b7954214c..8836a3605b 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -171,9 +171,13 @@ class PolynomialKernel : public GramMatrixBase { * @param exponent * @param gain * @param offset - * @param handle */ - PolynomialKernel(exp_t exponent, math_t gain, math_t offset, const raft::device_resources& handle) + PolynomialKernel(exp_t exponent, math_t gain, math_t offset) + : GramMatrixBase(), exponent(exponent), gain(gain), offset(offset) + { + } + + [[deprecated]] PolynomialKernel(exp_t exponent, math_t gain, math_t offset, cublasHandle_t handle) : GramMatrixBase(handle), exponent(exponent), gain(gain), offset(offset) { } @@ -187,19 +191,58 @@ class PolynomialKernel : public GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, + const raft::device_resources& handle, math_t* norm_x1, math_t* norm_x2) { - GramMatrixBase::linear(x1, x2, out, stream); - applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, handle.get_stream()); + } + + /** Evaluate the Gram matrix using the legacy interface. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 (usually it is n1) + * @param ld2 leading dimension of x2 (usually it is n2) + * @param ld_out leading dimension of out (usually it is n1) + */ + [[deprecated]] void evaluate(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) + { + ASSERT(GramMatrixBase::legacy_interface, + "Legacy interface can only be used with legacy ctor."); + raft::distance::matrix::detail::DenseMatrix dense1( + const_cast(x1), n1, n_cols, is_row_major, ld1); + raft::distance::matrix::detail::DenseMatrix dense2( + const_cast(x2), n2, n_cols, is_row_major, ld2); + raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); + GramMatrixBase::linear( + dense1, dense2, dense_out, stream, GramMatrixBase::cublas_handle); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); } }; @@ -237,9 +280,10 @@ class TanhKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain * @param offset - * @param handle */ - TanhKernel(math_t gain, math_t offset, const raft::device_resources& handle) + TanhKernel(math_t gain, math_t offset) : GramMatrixBase(), gain(gain), offset(offset) {} + + [[deprecated]] TanhKernel(math_t gain, math_t offset, cublasHandle_t handle) : GramMatrixBase(handle), gain(gain), offset(offset) { } @@ -253,19 +297,58 @@ class TanhKernel : public GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, + const raft::device_resources& handle, math_t* norm_x1, math_t* norm_x2) { - GramMatrixBase::linear(x1, x2, out, stream); - applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, stream); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, handle.get_stream()); + } + + /** Evaluate the Gram matrix using the legacy interface. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 (usually it is n1) + * @param ld2 leading dimension of x2 (usually it is n2) + * @param ld_out leading dimension of out (usually it is n1) + */ + [[deprecated]] void evaluate(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) + { + ASSERT(GramMatrixBase::legacy_interface, + "Legacy interface can only be used with legacy ctor."); + raft::distance::matrix::detail::DenseMatrix dense1( + const_cast(x1), n1, n_cols, is_row_major, ld1); + raft::distance::matrix::detail::DenseMatrix dense2( + const_cast(x2), n2, n_cols, is_row_major, ld2); + raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); + GramMatrixBase::linear( + dense1, dense2, dense_out, stream, GramMatrixBase::cublas_handle); + applyKernel(out, ld_out, n1, n2, is_row_major, stream); } }; @@ -304,14 +387,16 @@ class RBFKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain */ - RBFKernel(math_t gain, const raft::device_resources& handle) + RBFKernel(math_t gain) : GramMatrixBase(), gain(gain) {} + + [[deprecated]] RBFKernel(math_t gain, cublasHandle_t handle) : GramMatrixBase(handle), gain(gain) { } - void matrixDot(const raft::distance::matrix::detail::Matrix& matrix, - math_t* target, - cudaStream_t stream) + void matrixRowNormL2(const raft::distance::matrix::detail::Matrix& matrix, + math_t* target, + cudaStream_t stream) { auto norm = raft::linalg::NormType::L2Norm; if (matrix.isDense()) { @@ -342,17 +427,18 @@ class RBFKernel : public GramMatrixBase { * @param [in] x1 device matrix, size [n1*n_cols] * @param [in] x2 device matrix, size [n2*n_cols] * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] stream cuda stream + * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ void evaluate(const raft::distance::matrix::detail::Matrix& x1, const raft::distance::matrix::detail::Matrix& x2, raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, + const raft::device_resources& handle, math_t* norm_x1, math_t* norm_x2) { + cudaStream_t stream = handle.get_stream(); if (x1.isDense() && x2.isDense() && (norm_x1 == nullptr || norm_x2 == nullptr)) { auto x1_dense = x1.asDense(); auto x2_dense = x2.asDense(); @@ -363,15 +449,15 @@ class RBFKernel : public GramMatrixBase { if (norm_x1 == nullptr) { tmp_norm_x1.reserve(x1.n_rows, stream); norm_x1 = tmp_norm_x1.data(); - matrixDot(x1, norm_x1, stream); + matrixRowNormL2(x1, norm_x1, stream); } if (norm_x2 == nullptr) { tmp_norm_x2.reserve(x2.n_rows, stream); norm_x2 = tmp_norm_x2.data(); - matrixDot(x2, norm_x2, stream); + matrixRowNormL2(x2, norm_x2, stream); } // compute L2expanded - GramMatrixBase::linear(x1, x2, out, stream); + GramMatrixBase::linear(x1, x2, out, handle); applyExpandedRbfKernel( out.data, out.ld, out.n_rows, out.n_cols, norm_x1, norm_x2, out.is_row_major, stream); } @@ -415,6 +501,43 @@ class RBFKernel : public GramMatrixBase { fin_op, out.is_row_major); } + + /** Evaluate the Gram matrix using the legacy interface. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 (usually it is n1) + * @param ld2 leading dimension of x2 (usually it is n2) + * @param ld_out leading dimension of out (usually it is n1) + */ + [[deprecated]] void evaluate(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) + { + ASSERT(GramMatrixBase::legacy_interface, + "Legacy interface can only be used with legacy ctor."); + raft::distance::matrix::detail::DenseMatrix dense1( + const_cast(x1), n1, n_cols, is_row_major, ld1); + raft::distance::matrix::detail::DenseMatrix dense2( + const_cast(x2), n2, n_cols, is_row_major, ld2); + raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); + distance_rbf(dense1, dense2, dense_out, stream); + } }; }; // end namespace raft::distance::kernels::detail From 591b77dcd75937f1cea0079d2304b47d2f9cadeb Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Mon, 20 Mar 2023 16:56:55 +0000 Subject: [PATCH 10/20] changed GramMatrix API to support device_mdspan/device_csr_matrix_view as input --- cpp/include/raft/core/device_mdspan.hpp | 30 + .../distance/detail/kernels/gram_matrix.cuh | 609 ++++++++++++------ .../detail/kernels/kernel_matrices.cuh | 440 +++++++++---- .../raft/distance/detail/matrix/matrix.hpp | 99 --- cpp/test/distance/gram.cu | 14 +- cpp/test/sparse/gram.cu | 95 ++- 6 files changed, 794 insertions(+), 493 deletions(-) delete mode 100644 cpp/include/raft/distance/detail/matrix/matrix.hpp diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp index f72ae36d64..ace7ea0f2c 100644 --- a/cpp/include/raft/core/device_mdspan.hpp +++ b/cpp/include/raft/core/device_mdspan.hpp @@ -259,6 +259,36 @@ auto make_device_matrix_view(ElementType* ptr, IndexType n_rows, IndexType n_col return device_matrix_view{ptr, extents}; } +/** + * @brief Create a 2-dim mdspan instance for device pointer with a strided layout + * that is restricted to stride 1 in the trailing dimension. It's + * expected that the given layout policy match the layout of the underlying + * pointer. + * @tparam ElementType the data type of the matrix elements + * @tparam IndexType the index type of the extents + * @param[in] ptr on device to wrap + * @param[in] n_rows number of rows in pointer + * @param[in] n_cols number of columns in pointer + * @param[in] is_row_major whether the data is in row major format (column major otherwise) + * @param[in] ld leading dimension / stride of data + */ +template +auto make_device_matrix_view( + ElementType* ptr, IndexType n_rows, IndexType n_cols, bool is_row_major, IndexType ld) +{ + IndexType stride0 = is_row_major ? (ld > 0 ? ld : n_cols) : 1; + IndexType stride1 = is_row_major ? 1 : (ld > 0 ? ld : n_rows); + + assert(is_row_major ? stride0 >= n_cols : stride1 >= n_rows); + + matrix_extent extents{n_rows, n_cols}; + std::array strides{stride0, stride1}; + using mapping_type = typename layout_stride::template mapping>; + mapping_type layout = {extents, strides}; + + return device_matrix_view{ptr, layout}; +} + /** * @brief Create a 1-dim mdspan instance for device pointer. * @tparam ElementType the data type of the vector elements diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 14113bc2a7..9cce6cf5ee 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include #include #include @@ -28,6 +28,13 @@ namespace raft::distance::kernels::detail { +template +using dense_input_matrix_view_t = raft::device_matrix_view; +template +using dense_output_matrix_view_t = raft::device_matrix_view; +template +using csr_input_matrix_view_t = raft::device_csr_matrix_view; + /** * Base class for general Gram matrices * A Gram matrix is the Hermitian matrix of inner probucts G_ik = @@ -52,147 +59,410 @@ class GramMatrixBase { : cublas_handle(cublas_handle), legacy_interface(true){}; virtual ~GramMatrixBase(){}; + /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out (dense) device matrix to store the Gram matrix, size [n1*n2] + * @param [in] x1 dense device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - virtual void operator()(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle, - math_t* norm_x1 = nullptr, - math_t* norm_x2 = nullptr) + void operator()(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1 = nullptr, + math_t* norm_x2 = nullptr) + { + evaluate(x1, x2, out, handle, norm_x1, norm_x2); + } + + /** Convenience function to evaluate the Gram matrix for two vector sets. + * Vector sets are provided in Matrix format + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. + */ + void operator()(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1 = nullptr, + math_t* norm_x2 = nullptr) + { + evaluate(x1, x2, out, handle, norm_x1, norm_x2); + } + + /** Convenience function to evaluate the Gram matrix for two vector sets. + * Vector sets are provided in Matrix format + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. + */ + void operator()(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1 = nullptr, + math_t* norm_x2 = nullptr) { - ASSERT(x1.n_rows == out.n_rows, - "GramMatrix input matrix dimensions for x1 and out do not match"); - ASSERT(x2.n_rows == out.n_cols, - "GramMatrix input matrix dimensions for x2 and out do not match"); - ASSERT(x1.n_cols == x2.n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match"); evaluate(x1, x2, out, handle, norm_x1, norm_x2); } + // unfortunately, 'evaluate' cannot be templatized as it needs to be virtual + /** Evaluate the Gram matrix for two vector sets using simple dot product. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] x1 dense device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + virtual void evaluate(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + linear(x1, x2, out, handle); + } + /** Evaluate the Gram matrix for two vector sets using simple dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + virtual void evaluate(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + linear(x1, x2, out, handle); + } + /** Evaluate the Gram matrix for two vector sets using simple dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - virtual void evaluate(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle, + virtual void evaluate(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { linear(x1, x2, out, handle); } - // private: - // The following methods should be private, they are kept public to avoid: - // "error: The enclosing parent function ("distance") for an extended - // __device__ lambda cannot have private or protected access within its class" + /** Evaluate the Gram matrix for two vector sets using simple dot product. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 (usually it is n1) + * @param ld2 leading dimension of x2 (usually it is n2) + * @param ld_out leading dimension of out (usually it is n1) + */ + [[deprecated]] virtual void evaluate(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) + { + linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + } + + /** Convenience function to evaluate the Gram matrix for two vector sets. + * + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 + * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format + * @param [in] stream cuda stream + * @param ld1 leading dimension of x1 + * @param ld2 leading dimension of x2 + * @param ld_out leading dimension of out + */ + [[deprecated]] void operator()(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1 = 0, + int ld2 = 0, + int ld_out = 0) + { + ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor."); + if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; } + if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } + if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } + evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + } + protected: /** Calculates the Gram matrix using simple dot product between vector sets. * * out = x1 * x2 * * Can be used as a building block for more complex kernel functions. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] + * @param [in] x1 device array of vectors, size [n1*n_cols] + * @param [in] n1 number vectors in x1 + * @param [in] n_cols number of columns (features) in x1 and x2 + * @param [in] x2 device array of vectors, size [n2*n_cols] + * @param [in] n2 number vectors in x2 * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] is_row_major whether the input and output matrices are in row + * major format * @param [in] stream cuda stream - + @param [in] handle raft handle + * @param ld1 leading dimension of x1 + * @param ld2 leading dimension of x2 + * @param ld_out leading dimension of out */ - void linear(const raft::distance::matrix::detail::DenseMatrix& x1, - const raft::distance::matrix::detail::DenseMatrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, - cublasHandle_t cublas_handle) + [[deprecated]] void linear(const math_t* x1, + int n1, + int n_cols, + const math_t* x2, + int n2, + math_t* out, + bool is_row_major, + cudaStream_t stream, + int ld1, + int ld2, + int ld_out) { - ASSERT(x1.is_row_major == x2.is_row_major, - "GramMatrix leading dimensions for x1 and x2 do not match"); - ASSERT(x2.is_row_major == out.is_row_major, - "GramMatrix leading dimensions for x2 and out do not match"); - math_t alpha = 1.0; math_t beta = 0.0; - if (out.is_row_major) { + if (is_row_major) { // #TODO: Call from public API when ready RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, - out.n_cols, - out.n_rows, - x1.n_cols, + n2, + n1, + n_cols, &alpha, - x2.data, - x2.ld, - x1.data, - x1.ld, + x2, + ld2, + x1, + ld1, &beta, - out.data, - out.ld, + out, + ld_out, stream)); } else { // #TODO: Call from public API when ready RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, - out.n_rows, - out.n_cols, - x1.n_cols, + n1, + n2, + n_cols, &alpha, - x1.data, - x1.ld, - x2.data, - x2.ld, + x1, + ld1, + x2, + ld2, &beta, - out.data, - out.ld, + out, + ld_out, stream)); } } - void linear(const raft::distance::matrix::detail::CsrMatrix& x1, - const raft::distance::matrix::detail::DenseMatrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream, - const cusparseHandle_t& cusparse_handle) + protected: + bool get_is_row_major(dense_output_matrix_view_t matrix) + { + ASSERT(matrix.stride(0) == 1 || matrix.stride(1) == 1, + "GramMatrix matrix layout minor stride needs to be 1"); + return (matrix.stride(1) == 1); + } + + bool get_is_row_major(dense_input_matrix_view_t matrix) + { + ASSERT(matrix.stride(0) == 1 || matrix.stride(1) == 1, + "GramMatrix matrix layout minor stride needs to be 1"); + return (matrix.stride(1) == 1); + } + + /** Calculates the Gram matrix using simple dot product between vector sets. + * + * out = x1 * x2 + * + * Can be used as a building block for more complex kernel functions. + * + * @param [in] x1 dense device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + */ + void linear(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle) { + // check is_row_major consistency + bool is_row_major = get_is_row_major(out); + ASSERT(is_row_major ? (x1.stride(1) == 1) : (x1.stride(0) == 1), + "GramMatrix leading dimensions for x1 and out do not match"); + ASSERT(is_row_major ? (x2.stride(1) == 1) : (x2.stride(0) == 1), + "GramMatrix leading dimensions for x2 and out do not match"); + + // check dimensions + int n1 = out.extent(0); + int n2 = out.extent(1); + int n_cols = x1.extent(1); + ASSERT(x1.extent(0) == n1, "GramMatrix input matrix dimensions for x1 and out do not match"); + ASSERT(x2.extent(0) == n2, "GramMatrix input matrix dimensions for x2 and out do not match"); + ASSERT(x2.extent(1) == n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match"); + + // extract major stride + int ld1 = is_row_major ? x1.stride(0) : x1.stride(1); + int ld2 = is_row_major ? x2.stride(0) : x2.stride(1); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + math_t alpha = 1.0; math_t beta = 0.0; + if (is_row_major) { + // #TODO: Call from public API when ready + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + n2, + n1, + n_cols, + &alpha, + x2.data_handle(), + ld2, + x1.data_handle(), + ld1, + &beta, + out.data_handle(), + ld_out, + handle.get_stream())); + } else { + // #TODO: Call from public API when ready + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + CUBLAS_OP_N, + CUBLAS_OP_T, + n1, + n2, + n_cols, + &alpha, + x1.data_handle(), + ld1, + x2.data_handle(), + ld2, + &beta, + out.data_handle(), + ld_out, + handle.get_stream())); + } + } - ASSERT(x2.is_row_major == out.is_row_major, + /** Calculates the Gram matrix using simple dot product between vector sets. + * + * out = x1 * x2 + * + * Can be used as a building block for more complex kernel functions. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + */ + void linear(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle) + { + // check is_row_major consistency + bool is_row_major = get_is_row_major(out); + ASSERT(is_row_major ? (x2.stride(1) == 1) : (x2.stride(0) == 1), "GramMatrix leading dimensions for x2 and out do not match"); + // check dimensions + auto x1_structure = x1.get_structure(); + ASSERT(x1_structure.get_n_rows() == out.extent(0), + "GramMatrix input matrix dimensions for x1 and out do not match"); + ASSERT(x2.extent(0) == out.extent(1), + "GramMatrix input matrix dimensions for x2 and out do not match"); + ASSERT(x2.extent(1) == x1_structure.get_n_cols(), + "GramMatrix input matrix dimensions for x1 and x2 do not match"); + + // extract major stride + int ld2 = is_row_major ? x2.stride(0) : x2.stride(1); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + + math_t alpha = 1.0; + math_t beta = 0.0; + cusparseSpMatDescr_t descrX1; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&descrX1, - x1.n_rows, - x1.n_cols, - x1.nnz, - const_cast(x1.indptr), - const_cast(x1.indices), - const_cast(x1.data))); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatecsr(&descrX1, + x1_structure.get_n_rows(), + x1_structure.get_n_cols(), + x1_structure.get_nnz(), + const_cast(x1_structure.get_indptr().data()), + const_cast(x1_structure.get_indices().data()), + const_cast(x1.get_elements().data()))); - auto order = out.is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; cusparseDnMatDescr_t descrX2; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( - &descrX2, x2.n_rows, x2.n_cols, x2.ld, const_cast(x2.data), order)); + &descrX2, x2.extent(0), x2.extent(1), ld2, const_cast(x2.data_handle()), order)); cusparseDnMatDescr_t descrOut; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( - &descrOut, out.n_rows, out.n_cols, out.ld, const_cast(out.data), order)); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatednmat(&descrOut, + out.extent(0), + out.extent(1), + ld_out, + const_cast(out.data_handle()), + order)); auto alg = order == CUSPARSE_ORDER_COL ? CUSPARSE_SPMM_CSR_ALG1 : CUSPARSE_SPMM_CSR_ALG2; @@ -201,7 +471,7 @@ class GramMatrixBase { auto opX2 = CUSPARSE_OPERATION_TRANSPOSE; size_t bufferSize; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(cusparse_handle, + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), opX1, opX2, &alpha, @@ -211,13 +481,13 @@ class GramMatrixBase { descrOut, alg, &bufferSize, - stream)); + handle.get_stream())); - raft::interruptible::synchronize(stream); + raft::interruptible::synchronize(handle.get_stream()); - rmm::device_uvector tmp(bufferSize, stream); + rmm::device_uvector tmp(bufferSize, handle.get_stream()); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(cusparse_handle, + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(), opX1, opX2, &alpha, @@ -227,7 +497,7 @@ class GramMatrixBase { descrOut, alg, tmp.data(), - stream)); + handle.get_stream())); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descrX1)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descrX2)); @@ -235,150 +505,63 @@ class GramMatrixBase { RAFT_CUDA_TRY(cudaPeekAtLastError()); } - void linear(const raft::distance::matrix::detail::CsrMatrix& x1, - const raft::distance::matrix::detail::CsrMatrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle) - { - int minor_out = out.is_row_major ? out.n_cols : out.n_rows; - ASSERT(out.ld == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); - raft::sparse::distance::distances_config_t dist_config(handle); - - // switch a,b based on is_row_major - if (!out.is_row_major) { - dist_config.a_nrows = x2.n_rows; - dist_config.a_ncols = x2.n_cols; - dist_config.a_nnz = x2.nnz; - dist_config.a_indptr = const_cast(x2.indptr); - dist_config.a_indices = const_cast(x2.indices); - dist_config.a_data = const_cast(x2.data); - dist_config.b_nrows = x1.n_rows; - dist_config.b_ncols = x1.n_cols; - dist_config.b_nnz = x1.nnz; - dist_config.b_indptr = const_cast(x1.indptr); - dist_config.b_indices = const_cast(x1.indices); - dist_config.b_data = const_cast(x1.data); - } else { - dist_config.a_nrows = x1.n_rows; - dist_config.a_ncols = x1.n_cols; - dist_config.a_nnz = x1.nnz; - dist_config.a_indptr = const_cast(x1.indptr); - dist_config.a_indices = const_cast(x1.indices); - dist_config.a_data = const_cast(x1.data); - dist_config.b_nrows = x2.n_rows; - dist_config.b_ncols = x2.n_cols; - dist_config.b_nnz = x2.nnz; - dist_config.b_indptr = const_cast(x2.indptr); - dist_config.b_indices = const_cast(x2.indices); - dist_config.b_data = const_cast(x2.data); - } - - raft::sparse::distance::pairwiseDistance( - out.data, dist_config, raft::distance::DistanceType::InnerProduct, 0.0); - } - /** Calculates the Gram matrix using simple dot product between vector sets. * * out = x1 * x2 * * Can be used as a building block for more complex kernel functions. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle */ - void linear(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle) + void linear(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle) { - // dispatch - if (x1.isDense()) { - ASSERT(x2.isDense(), "GramMatrix input matrix does not allow Dense*Csr"); - auto x1_dense = x1.asDense(); - auto x2_dense = x2.asDense(); - linear(*x1_dense, *x2_dense, out, handle.get_stream(), handle.get_cublas_handle()); + // check is_row_major consistency + bool is_row_major = get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + int minor_out = is_row_major ? out.extent(1) : out.extent(0); + ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); + + auto x1_structure = x1.get_structure(); + auto x2_structure = x2.get_structure(); + raft::sparse::distance::distances_config_t dist_config(handle); + + // switch a,b based on is_row_major + if (!is_row_major) { + dist_config.a_nrows = x2_structure.get_n_rows(); + dist_config.a_ncols = x2_structure.get_n_cols(); + dist_config.a_nnz = x2_structure.get_nnz(); + dist_config.a_indptr = const_cast(x2_structure.get_indptr().data()); + dist_config.a_indices = const_cast(x2_structure.get_indices().data()); + dist_config.a_data = const_cast(x2.get_elements().data()); + dist_config.b_nrows = x1_structure.get_n_rows(); + dist_config.b_ncols = x1_structure.get_n_cols(); + dist_config.b_nnz = x1_structure.get_nnz(); + dist_config.b_indptr = const_cast(x1_structure.get_indptr().data()); + dist_config.b_indices = const_cast(x1_structure.get_indices().data()); + dist_config.b_data = const_cast(x1.get_elements().data()); } else { - auto x1_csr = x1.asCsr(); - if (x2.isDense()) { - auto x2_dense = x2.asDense(); - linear(*x1_csr, *x2_dense, out, handle.get_stream(), handle.get_cusparse_handle()); - } else { - auto x2_csr = x2.asCsr(); - linear(*x1_csr, *x2_csr, out, handle); - } + dist_config.a_nrows = x1_structure.get_n_rows(); + dist_config.a_ncols = x1_structure.get_n_cols(); + dist_config.a_nnz = x1_structure.get_nnz(); + dist_config.a_indptr = const_cast(x1_structure.get_indptr().data()); + dist_config.a_indices = const_cast(x1_structure.get_indices().data()); + dist_config.a_data = const_cast(x1.get_elements().data()); + dist_config.b_nrows = x2_structure.get_n_rows(); + dist_config.b_ncols = x2_structure.get_n_cols(); + dist_config.b_nnz = x2_structure.get_nnz(); + dist_config.b_indptr = const_cast(x2_structure.get_indptr().data()); + dist_config.b_indices = const_cast(x2_structure.get_indices().data()); + dist_config.b_data = const_cast(x2.get_elements().data()); } - } - - /** Evaluate the Gram matrix for two vector sets using simple dot product. - * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format - * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 (usually it is n1) - * @param ld2 leading dimension of x2 (usually it is n2) - * @param ld_out leading dimension of out (usually it is n1) - */ - [[deprecated]] virtual void evaluate(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1, - int ld2, - int ld_out) - { - ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor."); - raft::distance::matrix::detail::DenseMatrix dense1( - const_cast(x1), n1, n_cols, is_row_major, ld1); - raft::distance::matrix::detail::DenseMatrix dense2( - const_cast(x2), n2, n_cols, is_row_major, ld2); - raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); - linear(dense1, dense2, dense_out, stream, cublas_handle); - } - /** Convenience function to evaluate the Gram matrix for two vector sets. - * - * @param [in] x1 device array of vectors, size [n1*n_cols] - * @param [in] n1 number vectors in x1 - * @param [in] n_cols number of columns (features) in x1 and x2 - * @param [in] x2 device array of vectors, size [n2*n_cols] - * @param [in] n2 number vectors in x2 - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] - * @param [in] is_row_major whether the input and output matrices are in row - * major format - * @param [in] stream cuda stream - * @param ld1 leading dimension of x1 - * @param ld2 leading dimension of x2 - * @param ld_out leading dimension of out - */ - [[deprecated]] void operator()(const math_t* x1, - int n1, - int n_cols, - const math_t* x2, - int n2, - math_t* out, - bool is_row_major, - cudaStream_t stream, - int ld1 = 0, - int ld2 = 0, - int ld_out = 0) - { - ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor."); - if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; } - if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; } - if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; } - evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); + raft::sparse::distance::pairwiseDistance( + out.data_handle(), dist_config, raft::distance::DistanceType::InnerProduct, 0.0); } }; diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 8836a3605b..cb93ee3cf8 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -188,22 +188,79 @@ class PolynomialKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] x1 dense device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + void evaluate(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); + } + + /** Evaluate kernel matrix using polynomial kernel. + * + * output[i,k] = (gain* + offset)^exponent, + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and < , > denotes dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle, + void evaluate(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); GramMatrixBase::linear(x1, x2, out, handle); - applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, handle.get_stream()); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); + } + + /** Evaluate kernel matrix using polynomial kernel. + * + * output[i,k] = (gain* + offset)^exponent, + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and < , > denotes dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + void evaluate(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } /** Evaluate the Gram matrix using the legacy interface. @@ -235,13 +292,8 @@ class PolynomialKernel : public GramMatrixBase { { ASSERT(GramMatrixBase::legacy_interface, "Legacy interface can only be used with legacy ctor."); - raft::distance::matrix::detail::DenseMatrix dense1( - const_cast(x1), n1, n_cols, is_row_major, ld1); - raft::distance::matrix::detail::DenseMatrix dense2( - const_cast(x2), n2, n_cols, is_row_major, ld2); - raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); GramMatrixBase::linear( - dense1, dense2, dense_out, stream, GramMatrixBase::cublas_handle); + x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); applyKernel(out, ld_out, n1, n2, is_row_major, stream); } }; @@ -294,22 +346,79 @@ class TanhKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] x1 dense device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle, + void evaluate(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); GramMatrixBase::linear(x1, x2, out, handle); - applyKernel(out.data, out.ld, out.n_rows, out.n_cols, out.is_row_major, handle.get_stream()); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); + } + + /** Evaluate kernel matrix using tanh kernel. + * + * output_[i + k*n1] = (gain* + offset)^exponent, + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and < , > denotes dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + void evaluate(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); + } + + /** Evaluate kernel matrix using tanh kernel. + * + * output_[i + k*n1] = (gain* + offset)^exponent, + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and < , > denotes dot product. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 unused. + * @param norm_x2 unused. + */ + void evaluate(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel( + out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } /** Evaluate the Gram matrix using the legacy interface. @@ -341,13 +450,8 @@ class TanhKernel : public GramMatrixBase { { ASSERT(GramMatrixBase::legacy_interface, "Legacy interface can only be used with legacy ctor."); - raft::distance::matrix::detail::DenseMatrix dense1( - const_cast(x1), n1, n_cols, is_row_major, ld1); - raft::distance::matrix::detail::DenseMatrix dense2( - const_cast(x2), n2, n_cols, is_row_major, ld2); - raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); GramMatrixBase::linear( - dense1, dense2, dense_out, stream, GramMatrixBase::cublas_handle); + x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out); applyKernel(out, ld_out, n1, n2, is_row_major, stream); } }; @@ -359,14 +463,14 @@ template class RBFKernel : public GramMatrixBase { math_t gain; - void applyExpandedRbfKernel(math_t* inout, - int ld, - int rows, - int cols, - math_t* norm_x1, - math_t* norm_x2, - bool is_row_major, - cudaStream_t stream) + void applyKernel(math_t* inout, + int ld, + int rows, + int cols, + math_t* norm_x1, + math_t* norm_x2, + bool is_row_major, + cudaStream_t stream) { int n1 = is_row_major ? cols : rows; int n2 = is_row_major ? rows : cols; @@ -394,28 +498,83 @@ class RBFKernel : public GramMatrixBase { { } - void matrixRowNormL2(const raft::distance::matrix::detail::Matrix& matrix, + void matrixRowNormL2(dense_input_matrix_view_t matrix, math_t* target, cudaStream_t stream) { - auto norm = raft::linalg::NormType::L2Norm; - if (matrix.isDense()) { - auto dense_matrix = matrix.asDense(); - int minor = dense_matrix->is_row_major ? matrix.n_cols : matrix.n_rows; - ASSERT(dense_matrix->ld == minor, - "RBF Kernel lazy rowNorm compute does not support ld parameter"); - raft::linalg::rowNorm(target, - dense_matrix->data, - matrix.n_cols, - matrix.n_rows, - norm, - dense_matrix->is_row_major, - stream); - } else { - auto csr_matrix = matrix.asCsr(); - raft::sparse::linalg::rowNormCsr( - target, csr_matrix->indptr, csr_matrix->data, csr_matrix->nnz, matrix.n_rows, norm, stream); + bool is_row_major = GramMatrixBase::get_is_row_major(matrix); + int minor = is_row_major ? matrix.extent(1) : matrix.extent(0); + int ld = is_row_major ? matrix.stride(0) : matrix.stride(1); + ASSERT(ld == minor, "RBF Kernel lazy rowNorm compute does not support ld parameter"); + raft::linalg::rowNorm(target, + matrix.data_handle(), + matrix.extent(1), + matrix.extent(0), + raft::linalg::NormType::L2Norm, + is_row_major, + stream); + } + + void matrixRowNormL2(csr_input_matrix_view_t matrix, math_t* target, cudaStream_t stream) + { + auto matrix_structure = matrix.get_structure(); + raft::sparse::linalg::rowNormCsr(target, + matrix_structure.get_indptr().data(), + matrix.get_elements().data(), + matrix_structure.get_nnz(), + matrix_structure.get_n_rows(), + raft::linalg::NormType::L2Norm, + stream); + } + + /** Evaluate kernel matrix using RBF kernel. + * + * output_[i + k*n1] = exp(-gain*|x1_i - x2_k|^2), + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and | | euclidean distance. + * + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. + */ + void evaluate(dense_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) + { + cudaStream_t stream = handle.get_stream(); + + // lazy compute norms if not given + rmm::device_uvector tmp_norm_x1(0, stream); + rmm::device_uvector tmp_norm_x2(0, stream); + if (norm_x1 == nullptr) { + tmp_norm_x1.reserve(x1.extent(0), stream); + norm_x1 = tmp_norm_x1.data(); + matrixRowNormL2(x1, norm_x1, stream); } + if (norm_x2 == nullptr) { + tmp_norm_x2.reserve(x2.extent(0), stream); + norm_x2 = tmp_norm_x2.data(); + matrixRowNormL2(x2, norm_x2, stream); + } + + // compute L2expanded + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel(out.data_handle(), + ld_out, + out.extent(0), + out.extent(1), + norm_x1, + norm_x2, + is_row_major, + handle.get_stream()); } /** Evaluate kernel matrix using RBF kernel. @@ -424,82 +583,98 @@ class RBFKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and | | euclidean distance. * - * @param [in] x1 device matrix, size [n1*n_cols] - * @param [in] x2 device matrix, size [n2*n_cols] - * @param [out] out device buffer to store the Gram matrix, size [n1*n2] + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void evaluate(const raft::distance::matrix::detail::Matrix& x1, - const raft::distance::matrix::detail::Matrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - const raft::device_resources& handle, + void evaluate(csr_input_matrix_view_t x1, + dense_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { cudaStream_t stream = handle.get_stream(); - if (x1.isDense() && x2.isDense() && (norm_x1 == nullptr || norm_x2 == nullptr)) { - auto x1_dense = x1.asDense(); - auto x2_dense = x2.asDense(); - distance_rbf(*x1_dense, *x2_dense, out, stream); - } else { - rmm::device_uvector tmp_norm_x1(0, stream); - rmm::device_uvector tmp_norm_x2(0, stream); - if (norm_x1 == nullptr) { - tmp_norm_x1.reserve(x1.n_rows, stream); - norm_x1 = tmp_norm_x1.data(); - matrixRowNormL2(x1, norm_x1, stream); - } - if (norm_x2 == nullptr) { - tmp_norm_x2.reserve(x2.n_rows, stream); - norm_x2 = tmp_norm_x2.data(); - matrixRowNormL2(x2, norm_x2, stream); - } - // compute L2expanded - GramMatrixBase::linear(x1, x2, out, handle); - applyExpandedRbfKernel( - out.data, out.ld, out.n_rows, out.n_cols, norm_x1, norm_x2, out.is_row_major, stream); + + // lazy compute norms if not given + rmm::device_uvector tmp_norm_x1(0, stream); + rmm::device_uvector tmp_norm_x2(0, stream); + if (norm_x1 == nullptr) { + tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); + norm_x1 = tmp_norm_x1.data(); + matrixRowNormL2(x1, norm_x1, stream); + } + if (norm_x2 == nullptr) { + tmp_norm_x2.reserve(x2.extent(0), stream); + norm_x2 = tmp_norm_x2.data(); + matrixRowNormL2(x2, norm_x2, stream); } + + // compute L2expanded + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel(out.data_handle(), + ld_out, + out.extent(0), + out.extent(1), + norm_x1, + norm_x2, + is_row_major, + handle.get_stream()); } - /** Customize distance function withe RBF epilogue */ - void distance_rbf(const raft::distance::matrix::detail::DenseMatrix& x1, - const raft::distance::matrix::detail::DenseMatrix& x2, - raft::distance::matrix::detail::DenseMatrix& out, - cudaStream_t stream) + /** Evaluate kernel matrix using RBF kernel. + * + * output_[i + k*n1] = exp(-gain*|x1_i - x2_k|^2), + * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector + * in the x2 set, and | | euclidean distance. + * + * @param [in] x1 csr device matrix view, size [n1*n_cols] + * @param [in] x2 csr device matrix view, size [n2*n_cols] + * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] + * @param [in] handle raft handle + * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. + * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. + */ + void evaluate(csr_input_matrix_view_t x1, + csr_input_matrix_view_t x2, + dense_output_matrix_view_t out, + raft::device_resources const& handle, + math_t* norm_x1, + math_t* norm_x2) { - int minor1 = x1.is_row_major ? x1.n_cols : x1.n_rows; - int minor2 = x2.is_row_major ? x2.n_cols : x2.n_rows; - int minor_out = out.is_row_major ? out.n_cols : out.n_rows; - ASSERT(x1.ld == minor1, "RBF Kernel distance does not support ld1 parameter"); - ASSERT(x2.ld == minor2, "RBF Kernel distance does not support ld2 parameter"); - ASSERT(out.ld == minor_out, "RBF Kernel distance does not support ld_out parameter"); - ASSERT(x1.is_row_major == x2.is_row_major, - "GramMatrix leading dimensions for x1 and x2 do not match"); - ASSERT(x2.is_row_major == out.is_row_major, - "GramMatrix leading dimensions for x2 and out do not match"); + cudaStream_t stream = handle.get_stream(); - math_t gain = this->gain; - using index_t = int64_t; + // lazy compute norms if not given + rmm::device_uvector tmp_norm_x1(0, stream); + rmm::device_uvector tmp_norm_x2(0, stream); + if (norm_x1 == nullptr) { + tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); + norm_x1 = tmp_norm_x1.data(); + matrixRowNormL2(x1, norm_x1, stream); + } + if (norm_x2 == nullptr) { + tmp_norm_x2.reserve(x2.get_structure().get_n_rows(), stream); + norm_x2 = tmp_norm_x2.data(); + matrixRowNormL2(x2, norm_x2, stream); + } - auto fin_op = [gain] __device__(math_t d_val, index_t idx) { return exp(-gain * d_val); }; - raft::distance::distance(device_resources(stream), - const_cast(x1.data), - const_cast(x2.data), - out.data, - out.n_rows, - out.n_cols, - x1.n_cols, - NULL, - 0, - fin_op, - out.is_row_major); + // compute L2expanded + bool is_row_major = GramMatrixBase::get_is_row_major(out); + int ld_out = is_row_major ? out.stride(0) : out.stride(1); + GramMatrixBase::linear(x1, x2, out, handle); + applyKernel(out.data_handle(), + ld_out, + out.extent(0), + out.extent(1), + norm_x1, + norm_x2, + is_row_major, + handle.get_stream()); } /** Evaluate the Gram matrix using the legacy interface. @@ -531,12 +706,33 @@ class RBFKernel : public GramMatrixBase { { ASSERT(GramMatrixBase::legacy_interface, "Legacy interface can only be used with legacy ctor."); - raft::distance::matrix::detail::DenseMatrix dense1( - const_cast(x1), n1, n_cols, is_row_major, ld1); - raft::distance::matrix::detail::DenseMatrix dense2( - const_cast(x2), n2, n_cols, is_row_major, ld2); - raft::distance::matrix::detail::DenseMatrix dense_out(out, n1, n2, is_row_major, ld_out); - distance_rbf(dense1, dense2, dense_out, stream); + int minor1 = is_row_major ? n_cols : n1; + int minor2 = is_row_major ? n_cols : n2; + int minor_out = is_row_major ? n2 : n1; + ASSERT(ld1 == minor1, "RBF Kernel distance does not support ld1 parameter"); + ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter"); + ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter"); + + math_t gain = this->gain; + using index_t = int64_t; + + auto fin_op = [gain] __device__(math_t d_val, index_t idx) { return exp(-gain * d_val); }; + raft::distance::distance(device_resources(stream), + const_cast(x1), + const_cast(x2), + out, + n1, + n2, + n_cols, + NULL, + 0, + fin_op, + is_row_major); } }; diff --git a/cpp/include/raft/distance/detail/matrix/matrix.hpp b/cpp/include/raft/distance/detail/matrix/matrix.hpp deleted file mode 100644 index d4a0dda691..0000000000 --- a/cpp/include/raft/distance/detail/matrix/matrix.hpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace raft::distance::matrix::detail { - -template -class DenseMatrix; -template -class CsrMatrix; - -/* - * Thin matrix wrapper to allow single API for different matrix representations - */ -template -class Matrix { - public: - Matrix(int rows, int cols) : n_rows(rows), n_cols(cols){}; - virtual bool isDense() const = 0; - virtual ~Matrix(){}; - - DenseMatrix* asDense() - { - DenseMatrix* cast = dynamic_cast*>(this); - ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); - return cast; - }; - - CsrMatrix* asCsr() - { - CsrMatrix* cast = dynamic_cast*>(this); - ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); - return cast; - }; - - const DenseMatrix* asDense() const - { - const DenseMatrix* cast = dynamic_cast*>(this); - ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); - return cast; - }; - - const CsrMatrix* asCsr() const - { - const CsrMatrix* cast = dynamic_cast*>(this); - ASSERT(cast != nullptr, "Invalid cast! Please check for isDense() before casting."); - return cast; - }; - - int n_rows; - int n_cols; -}; - -template -class DenseMatrix : public Matrix { - public: - DenseMatrix(math_t* data, int rows, int cols, bool row_major = false, int ld_in = 0) - : Matrix(rows, cols), data(data), is_row_major(row_major), ld(ld_in) - { - if (ld <= 0) ld = is_row_major ? cols : rows; - } - bool isDense() const { return true; } - math_t* data; - bool is_row_major; - int ld; -}; - -template -class CsrMatrix : public Matrix { - public: - CsrMatrix(int* indptr, int* indices, math_t* data, int nnz, int rows, int cols) - : Matrix(rows, cols), indptr(indptr), indices(indices), data(data), nnz(nnz) - { - } - bool isDense() const { return false; } - - int nnz; - int* indptr; - int* indices; - math_t* data; -}; - -} // namespace raft::distance::matrix::detail \ No newline at end of file diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index 6a93fed0ad..4adc07b240 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -33,8 +32,6 @@ namespace raft::distance::kernels { -using namespace raft::distance::matrix::detail; - struct GramMatrixInputs { int n1; // feature vectors in matrix 1 int n2; // featuer vectors in matrix 2 @@ -111,16 +108,15 @@ class GramMatrixTest : public ::testing::TestWithParam { void runTest() { std::unique_ptr> kernel = - std::unique_ptr>(KernelFactory::create(params.kernel, handle)); + std::unique_ptr>(KernelFactory::create(params.kernel)); - DenseMatrix x1_dense( + auto x1_span = raft::make_device_matrix_view( x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); - DenseMatrix x2_dense( + auto x2_span = raft::make_device_matrix_view( x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); - DenseMatrix gram_dense( + auto out_span = raft::make_device_matrix_view( gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); - - (*kernel)(x1_dense, x2_dense, gram_dense, stream); + (*kernel)(x1_span, x2_span, out_span, handle); naiveGramMatrixKernel(params.n1, params.n2, diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index bd714d25b3..22f5e3b991 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -35,8 +34,6 @@ namespace raft::distance::kernels { -using namespace raft::distance::matrix::detail; - /** * Structure to describe structure of the input matrices: * - DENSE: dense, dense @@ -213,56 +210,58 @@ class GramMatrixTest : public ::testing::TestWithParam { void runTest() { std::unique_ptr> kernel = - std::unique_ptr>(KernelFactory::create(params.kernel, handle)); + std::unique_ptr>(KernelFactory::create(params.kernel)); - Matrix* x1_matrix = nullptr; - Matrix* x2_matrix = nullptr; + auto x1_span = raft::make_device_matrix_view( + x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); + auto x2_span = raft::make_device_matrix_view( + x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); + auto out_span = raft::make_device_matrix_view( + gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); - if (params.sparse_input != SparseType::DENSE) { + if (params.sparse_input == SparseType::DENSE) { + (*kernel)(x1_span, x2_span, out_span, handle); + } else { x1_csr_indptr.reserve(params.n1 + 1, stream); x1_csr_indices.reserve(params.n1 * params.n_cols, stream); x1_csr_data.reserve(params.n1 * params.n_cols, stream); - int nnz = prepareCsr(x1.data(), - params.n1, - params.ld1, - x1_csr_indptr.data(), - x1_csr_indices.data(), - x1_csr_data.data()); - x1_matrix = new CsrMatrix(x1_csr_indptr.data(), - x1_csr_indices.data(), - x1_csr_data.data(), - nnz, - params.n1, - params.n_cols); - } else { - x1_matrix = new DenseMatrix( - x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); - } - - if (params.sparse_input == SparseType::CSR) { - x2_csr_indptr.reserve(params.n2 + 1, stream); - x2_csr_indices.reserve(params.n2 * params.n_cols, stream); - x2_csr_data.reserve(params.n2 * params.n_cols, stream); - int nnz = prepareCsr(x2.data(), - params.n2, - params.ld2, - x2_csr_indptr.data(), - x2_csr_indices.data(), - x2_csr_data.data()); - x2_matrix = new CsrMatrix(x2_csr_indptr.data(), - x2_csr_indices.data(), - x2_csr_data.data(), - nnz, - params.n2, - params.n_cols); - } else { - x2_matrix = new DenseMatrix( - x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); + int x1_nnz = prepareCsr(x1.data(), + params.n1, + params.ld1, + x1_csr_indptr.data(), + x1_csr_indices.data(), + x1_csr_data.data()); + + auto x1_csr_structure = raft::make_device_csr_structure_view( + x1_csr_indptr.data(), x1_csr_indices.data(), params.n1, params.n_cols, x1_nnz); + + auto x1_csr = raft::device_csr_matrix_view( + raft::device_span(x1_csr_data.data(), x1_csr_structure.get_nnz()), + x1_csr_structure); + + if (params.sparse_input == SparseType::MIX) { + (*kernel)(x1_csr, x2_span, out_span, handle); + } else { + x2_csr_indptr.reserve(params.n2 + 1, stream); + x2_csr_indices.reserve(params.n2 * params.n_cols, stream); + x2_csr_data.reserve(params.n2 * params.n_cols, stream); + int x2_nnz = prepareCsr(x2.data(), + params.n2, + params.ld2, + x2_csr_indptr.data(), + x2_csr_indices.data(), + x2_csr_data.data()); + + auto x2_csr_structure = raft::make_device_csr_structure_view( + x2_csr_indptr.data(), x2_csr_indices.data(), params.n2, params.n_cols, x2_nnz); + auto x2_csr = raft::device_csr_matrix_view( + raft::device_span(x2_csr_data.data(), x2_csr_structure.get_nnz()), + x2_csr_structure); + + (*kernel)(x1_csr, x2_csr, out_span, handle); + } } - DenseMatrix gram_dense( - gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); - naiveGramMatrixKernel(params.n1, params.n2, params.n_cols, @@ -277,14 +276,10 @@ class GramMatrixTest : public ::testing::TestWithParam { stream, handle); - (*kernel)(*x1_matrix, *x2_matrix, gram_dense, stream); handle.sync_stream(stream); ASSERT_TRUE(raft::devArrMatchHost( gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); - - delete x1_matrix; - delete x2_matrix; } raft::device_resources handle; From 2403b2d73f5040e7b15204047861ae7e5443bcc4 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 30 Mar 2023 18:39:50 +0000 Subject: [PATCH 11/20] utilize public API for spmm, gemm --- cpp/include/raft/core/device_mdspan.hpp | 21 +-- .../distance/detail/kernels/gram_matrix.cuh | 166 ++++++------------ .../raft/sparse/linalg/detail/spmm.hpp | 147 ++++++++++++++++ cpp/include/raft/sparse/linalg/spmm.cuh | 76 ++++++++ cpp/test/distance/gram.cu | 25 ++- cpp/test/sparse/gram.cu | 24 ++- 6 files changed, 325 insertions(+), 134 deletions(-) create mode 100644 cpp/include/raft/sparse/linalg/detail/spmm.hpp create mode 100644 cpp/include/raft/sparse/linalg/spmm.cuh diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp index ace7ea0f2c..c4a493503e 100644 --- a/cpp/include/raft/core/device_mdspan.hpp +++ b/cpp/include/raft/core/device_mdspan.hpp @@ -266,26 +266,27 @@ auto make_device_matrix_view(ElementType* ptr, IndexType n_rows, IndexType n_col * pointer. * @tparam ElementType the data type of the matrix elements * @tparam IndexType the index type of the extents + * @tparam LayoutPolicy policy for strides and layout ordering * @param[in] ptr on device to wrap * @param[in] n_rows number of rows in pointer * @param[in] n_cols number of columns in pointer * @param[in] is_row_major whether the data is in row major format (column major otherwise) - * @param[in] ld leading dimension / stride of data + * @param[in] stride leading dimension / stride of data */ -template -auto make_device_matrix_view( - ElementType* ptr, IndexType n_rows, IndexType n_cols, bool is_row_major, IndexType ld) +template +auto make_device_strided_matrix_view(ElementType* ptr, + IndexType n_rows, + IndexType n_cols, + IndexType stride) { - IndexType stride0 = is_row_major ? (ld > 0 ? ld : n_cols) : 1; - IndexType stride1 = is_row_major ? 1 : (ld > 0 ? ld : n_rows); + constexpr auto is_row_major = std::is_same_v; + IndexType stride0 = is_row_major ? (stride > 0 ? stride : n_cols) : 1; + IndexType stride1 = is_row_major ? 1 : (stride > 0 ? stride : n_rows); assert(is_row_major ? stride0 >= n_cols : stride1 >= n_rows); - matrix_extent extents{n_rows, n_cols}; - std::array strides{stride0, stride1}; - using mapping_type = typename layout_stride::template mapping>; - mapping_type layout = {extents, strides}; + auto layout = make_strided_layout(extents, std::array{stride0, stride1}); return device_matrix_view{ptr, layout}; } diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 9cce6cf5ee..31feb75e05 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -20,8 +20,9 @@ #include #include #include -#include +//#include #include +#include #include #include @@ -315,18 +316,24 @@ class GramMatrixBase { protected: bool get_is_row_major(dense_output_matrix_view_t matrix) { - ASSERT(matrix.stride(0) == 1 || matrix.stride(1) == 1, - "GramMatrix matrix layout minor stride needs to be 1"); return (matrix.stride(1) == 1); } bool get_is_row_major(dense_input_matrix_view_t matrix) { - ASSERT(matrix.stride(0) == 1 || matrix.stride(1) == 1, - "GramMatrix matrix layout minor stride needs to be 1"); return (matrix.stride(1) == 1); } + bool get_is_col_major(dense_output_matrix_view_t matrix) + { + return (matrix.stride(0) == 1); + } + + bool get_is_col_major(dense_input_matrix_view_t matrix) + { + return (matrix.stride(0) == 1); + } + /** Calculates the Gram matrix using simple dot product between vector sets. * * out = x1 * x2 @@ -344,11 +351,10 @@ class GramMatrixBase { raft::device_resources const& handle) { // check is_row_major consistency - bool is_row_major = get_is_row_major(out); - ASSERT(is_row_major ? (x1.stride(1) == 1) : (x1.stride(0) == 1), - "GramMatrix leading dimensions for x1 and out do not match"); - ASSERT(is_row_major ? (x2.stride(1) == 1) : (x2.stride(0) == 1), - "GramMatrix leading dimensions for x2 and out do not match"); + bool is_row_major = get_is_row_major(x1) && get_is_row_major(x2) && get_is_row_major(out); + bool is_col_major = get_is_col_major(x1) && get_is_col_major(x2) && get_is_col_major(out); + ASSERT(is_row_major || is_col_major, + "GramMatrix leading dimensions for x1, x2 and out do not match"); // check dimensions int n1 = out.extent(0); @@ -366,39 +372,41 @@ class GramMatrixBase { math_t alpha = 1.0; math_t beta = 0.0; if (is_row_major) { - // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - n2, - n1, - n_cols, - &alpha, - x2.data_handle(), - ld2, - x1.data_handle(), - ld1, - &beta, - out.data_handle(), - ld_out, - handle.get_stream())); + // #TODO: Use mdspan-based API when stride-capable + // https://github.com/rapidsai/raft/issues/875 + raft::linalg::gemm(handle, + true, + false, + n2, + n1, + n_cols, + &alpha, + x2.data_handle(), + ld2, + x1.data_handle(), + ld1, + &beta, + out.data_handle(), + ld_out, + handle.get_stream()); } else { - // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), - CUBLAS_OP_N, - CUBLAS_OP_T, - n1, - n2, - n_cols, - &alpha, - x1.data_handle(), - ld1, - x2.data_handle(), - ld2, - &beta, - out.data_handle(), - ld_out, - handle.get_stream())); + // #TODO: Use mdspan-based API when stride-capable + // https://github.com/rapidsai/raft/issues/875 + raft::linalg::gemm(handle, + false, + true, + n1, + n2, + n_cols, + &alpha, + x1.data_handle(), + ld1, + x2.data_handle(), + ld2, + &beta, + out.data_handle(), + ld_out, + handle.get_stream()); } } @@ -419,8 +427,9 @@ class GramMatrixBase { raft::device_resources const& handle) { // check is_row_major consistency - bool is_row_major = get_is_row_major(out); - ASSERT(is_row_major ? (x2.stride(1) == 1) : (x2.stride(0) == 1), + bool is_row_major = get_is_row_major(x2) && get_is_row_major(out); + bool is_col_major = get_is_col_major(x2) && get_is_col_major(out); + ASSERT(is_row_major || is_col_major, "GramMatrix leading dimensions for x2 and out do not match"); // check dimensions @@ -432,77 +441,10 @@ class GramMatrixBase { ASSERT(x2.extent(1) == x1_structure.get_n_cols(), "GramMatrix input matrix dimensions for x1 and x2 do not match"); - // extract major stride - int ld2 = is_row_major ? x2.stride(0) : x2.stride(1); - int ld_out = is_row_major ? out.stride(0) : out.stride(1); - math_t alpha = 1.0; math_t beta = 0.0; - cusparseSpMatDescr_t descrX1; - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsecreatecsr(&descrX1, - x1_structure.get_n_rows(), - x1_structure.get_n_cols(), - x1_structure.get_nnz(), - const_cast(x1_structure.get_indptr().data()), - const_cast(x1_structure.get_indices().data()), - const_cast(x1.get_elements().data()))); - - auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; - - cusparseDnMatDescr_t descrX2; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( - &descrX2, x2.extent(0), x2.extent(1), ld2, const_cast(x2.data_handle()), order)); - - cusparseDnMatDescr_t descrOut; - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsecreatednmat(&descrOut, - out.extent(0), - out.extent(1), - ld_out, - const_cast(out.data_handle()), - order)); - - auto alg = order == CUSPARSE_ORDER_COL ? CUSPARSE_SPMM_CSR_ALG1 : CUSPARSE_SPMM_CSR_ALG2; - - // compute X1*X2^T - auto opX1 = CUSPARSE_OPERATION_NON_TRANSPOSE; - auto opX2 = CUSPARSE_OPERATION_TRANSPOSE; - - size_t bufferSize; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), - opX1, - opX2, - &alpha, - descrX1, - descrX2, - &beta, - descrOut, - alg, - &bufferSize, - handle.get_stream())); - - raft::interruptible::synchronize(handle.get_stream()); - - rmm::device_uvector tmp(bufferSize, handle.get_stream()); - - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(), - opX1, - opX2, - &alpha, - descrX1, - descrX2, - &beta, - descrOut, - alg, - tmp.data(), - handle.get_stream())); - - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descrX1)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descrX2)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descrOut)); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + raft::sparse::linalg::spmm(handle, false, true, &alpha, x1, x2, &beta, out); } /** Calculates the Gram matrix using simple dot product between vector sets. diff --git a/cpp/include/raft/sparse/linalg/detail/spmm.hpp b/cpp/include/raft/sparse/linalg/detail/spmm.hpp new file mode 100644 index 0000000000..ec5328f72e --- /dev/null +++ b/cpp/include/raft/sparse/linalg/detail/spmm.hpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft { +namespace sparse { +namespace linalg { +namespace detail { + +/** + * @brief create a cuSparse dense descriptor + * @tparam ValueType Data type of dense_view (float/double) + * @tparam IndexType Type of dense_view + * @tparam LayoutPolicy layout of dense_view + * @param[in] handle raft handle + * @param[in] dense_view input raft::device_matrix_view + * @returns dense matrix descriptor to be used by cuSparse API + */ +template +cusparseDnMatDescr_t create_descriptor( + raft::device_matrix_view& dense_view) +{ + ASSERT(dense_view.stride(0) == 1 || dense_view.stride(1) == 1, "Smallest stride needs to be 1"); + bool is_row_major = dense_view.stride(1) == 1; + auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + IndexType ld = is_row_major ? dense_view.stride(0) : dense_view.stride(1); + cusparseDnMatDescr_t descr; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( + &descr, + dense_view.extent(0), + dense_view.extent(1), + ld, + const_cast*>(dense_view.data_handle()), + order)); + return descr; +} + +/** + * @brief create a cuSparse sparse descriptor + * @tparam ValueType Data type of sparse_view (float/double) + * @tparam NZType Type of sparse_view + * @param[in] handle raft handle + * @param[in] sparse_view input raft::device_csr_matrix_view of size M rows x K columns + * @returns sparse matrix descriptor to be used by cuSparse API + */ +template +cusparseSpMatDescr_t create_descriptor( + raft::device_csr_matrix_view& sparse_view) +{ + cusparseSpMatDescr_t descr; + auto csr_structure = sparse_view.get_structure(); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr( + &descr, + csr_structure.get_n_rows(), + csr_structure.get_n_cols(), + csr_structure.get_nnz(), + const_cast(csr_structure.get_indptr().data()), + const_cast(csr_structure.get_indices().data()), + const_cast*>(sparse_view.get_elements().data()))); + return descr; +} + +/** + * @brief SPMM function designed for handling all CSR * DENSE + * combinations of operand layouts for cuSparse. + * It computes the following equation: Z = alpha . X * Y + beta . Z + * where X is a CSR device matrix view and Y,Z are device matrix views + * @tparam ValueType Data type of input/output matrices (float/double) + * @tparam IndexType Type of Y and Z + * @tparam NZType Type of X + * @tparam LayoutPolicyY layout of Y + * @tparam LayoutPolicyZ layout of Z + * @param[in] handle raft handle + * @param[in] trans_x transpose operation for X + * @param[in] trans_y transpose operation for Y + * @param[in] alpha scalar + * @param[in] descr_x input sparse descriptor + * @param[in] descr_y input dense descriptor + * @param[in] beta scalar + * @param[out] descr_z output dense descriptor + */ +template +void spmm(raft::device_resources const& handle, + const bool trans_x, + const bool trans_y, + const ValueType* alpha, + cusparseSpMatDescr_t& descr_x, + cusparseDnMatDescr_t& descr_y, + const ValueType* beta, + cusparseDnMatDescr_t& descr_z) +{ + auto opX = trans_x ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; + auto opY = trans_y ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; + auto alg = CUSPARSE_SPMM_CSR_ALG1; + size_t bufferSize; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), + opX, + opY, + alpha, + descr_x, + descr_y, + beta, + descr_z, + alg, + &bufferSize, + handle.get_stream())); + + raft::interruptible::synchronize(handle.get_stream()); + + rmm::device_uvector tmp(bufferSize, handle.get_stream()); + + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(), + opX, + opY, + alpha, + descr_x, + descr_y, + beta, + descr_z, + alg, + tmp.data(), + handle.get_stream())); +} + +} // end namespace detail +} // end namespace linalg +} // end namespace sparse +} // end namespace raft diff --git a/cpp/include/raft/sparse/linalg/spmm.cuh b/cpp/include/raft/sparse/linalg/spmm.cuh new file mode 100644 index 0000000000..95396309bc --- /dev/null +++ b/cpp/include/raft/sparse/linalg/spmm.cuh @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPMM_H +#define __SPMM_H + +#pragma once + +#include "detail/spmm.hpp" + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @brief SPMM function designed for handling all CSR * DENSE + * combinations of operand layouts for cuSparse. + * It computes the following equation: Z = alpha . X * Y + beta . Z + * where X is a CSR device matrix view and Y,Z are device matrix views + * @tparam ValueType Data type of input/output matrices (float/double) + * @tparam IndexType Type of Y and Z + * @tparam NZType Type of X + * @tparam LayoutPolicyY layout of Y + * @tparam LayoutPolicyZ layout of Z + * @param[in] handle raft handle + * @param[in] trans_x transpose operation for X + * @param[in] trans_y transpose operation for Y + * @param[in] alpha scalar + * @param[in] x input raft::device_csr_matrix_view + * @param[in] y input raft::device_matrix_view + * @param[in] beta scalar + * @param[out] z output raft::device_matrix_view + */ +template +void spmm(raft::device_resources const& handle, + const bool trans_x, + const bool trans_y, + const ValueType* alpha, + raft::device_csr_matrix_view x, + raft::device_matrix_view y, + const ValueType* beta, + raft::device_matrix_view z) +{ + auto descr_x = detail::create_descriptor(x); + auto descr_y = detail::create_descriptor(y); + auto descr_z = detail::create_descriptor(z); + + detail::spmm(handle, trans_x, trans_y, alpha, descr_x, descr_y, beta, descr_z); + + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr_x)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_y)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_z)); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +} // end namespace linalg +} // end namespace sparse +} // end namespace raft + +#endif diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index 810dbbc45b..c4277c7c98 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -110,12 +110,25 @@ class GramMatrixTest : public ::testing::TestWithParam { std::unique_ptr> kernel = std::unique_ptr>(KernelFactory::create(params.kernel)); - auto x1_span = raft::make_device_matrix_view( - x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); - auto x2_span = raft::make_device_matrix_view( - x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); - auto out_span = raft::make_device_matrix_view( - gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); + auto x1_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + x1.data(), params.n1, params.n_cols, params.ld1) + : raft::make_device_strided_matrix_view( + x1.data(), params.n1, params.n_cols, params.ld1); + auto x2_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + x2.data(), params.n2, params.n_cols, params.ld2) + : raft::make_device_strided_matrix_view( + x2.data(), params.n2, params.n_cols, params.ld2); + auto out_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + gram.data(), params.n1, params.n2, params.ld_out) + : raft::make_device_strided_matrix_view( + gram.data(), params.n1, params.n2, params.ld_out); + (*kernel)(x1_span, x2_span, out_span, handle); naiveGramMatrixKernel(params.n1, diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index 22f5e3b991..cf0ddfc921 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -212,12 +212,24 @@ class GramMatrixTest : public ::testing::TestWithParam { std::unique_ptr> kernel = std::unique_ptr>(KernelFactory::create(params.kernel)); - auto x1_span = raft::make_device_matrix_view( - x1.data(), params.n1, params.n_cols, params.is_row_major, params.ld1); - auto x2_span = raft::make_device_matrix_view( - x2.data(), params.n2, params.n_cols, params.is_row_major, params.ld2); - auto out_span = raft::make_device_matrix_view( - gram.data(), params.n1, params.n2, params.is_row_major, params.ld_out); + auto x1_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + x1.data(), params.n1, params.n_cols, params.ld1) + : raft::make_device_strided_matrix_view( + x1.data(), params.n1, params.n_cols, params.ld1); + auto x2_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + x2.data(), params.n2, params.n_cols, params.ld2) + : raft::make_device_strided_matrix_view( + x2.data(), params.n2, params.n_cols, params.ld2); + auto out_span = + params.is_row_major + ? raft::make_device_strided_matrix_view( + gram.data(), params.n1, params.n2, params.ld_out) + : raft::make_device_strided_matrix_view( + gram.data(), params.n1, params.n2, params.ld_out); if (params.sparse_input == SparseType::DENSE) { (*kernel)(x1_span, x2_span, out_span, handle); From f57be138f72361e9bf49b5f0c2133620cf9d47ed Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 30 Mar 2023 20:08:59 +0000 Subject: [PATCH 12/20] refactored rowNormCsr to utilize csr_row_op --- .../detail/kernels/kernel_matrices.cuh | 30 +++--- .../raft/sparse/linalg/detail/norm.cuh | 97 +++++++------------ cpp/include/raft/sparse/linalg/norm.cuh | 18 ++-- cpp/test/sparse/norm.cu | 2 +- 4 files changed, 60 insertions(+), 87 deletions(-) diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index cb93ee3cf8..1117165c76 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -498,9 +498,9 @@ class RBFKernel : public GramMatrixBase { { } - void matrixRowNormL2(dense_input_matrix_view_t matrix, - math_t* target, - cudaStream_t stream) + void matrixRowNormL2(raft::device_resources const& handle, + dense_input_matrix_view_t matrix, + math_t* target) { bool is_row_major = GramMatrixBase::get_is_row_major(matrix); int minor = is_row_major ? matrix.extent(1) : matrix.extent(0); @@ -512,19 +512,21 @@ class RBFKernel : public GramMatrixBase { matrix.extent(0), raft::linalg::NormType::L2Norm, is_row_major, - stream); + handle.get_stream()); } - void matrixRowNormL2(csr_input_matrix_view_t matrix, math_t* target, cudaStream_t stream) + void matrixRowNormL2(raft::device_resources const& handle, + csr_input_matrix_view_t matrix, + math_t* target) { auto matrix_structure = matrix.get_structure(); - raft::sparse::linalg::rowNormCsr(target, + raft::sparse::linalg::rowNormCsr(handle, matrix_structure.get_indptr().data(), matrix.get_elements().data(), matrix_structure.get_nnz(), matrix_structure.get_n_rows(), - raft::linalg::NormType::L2Norm, - stream); + target, + raft::linalg::NormType::L2Norm); } /** Evaluate kernel matrix using RBF kernel. @@ -555,12 +557,12 @@ class RBFKernel : public GramMatrixBase { if (norm_x1 == nullptr) { tmp_norm_x1.reserve(x1.extent(0), stream); norm_x1 = tmp_norm_x1.data(); - matrixRowNormL2(x1, norm_x1, stream); + matrixRowNormL2(handle, x1, norm_x1); } if (norm_x2 == nullptr) { tmp_norm_x2.reserve(x2.extent(0), stream); norm_x2 = tmp_norm_x2.data(); - matrixRowNormL2(x2, norm_x2, stream); + matrixRowNormL2(handle, x2, norm_x2); } // compute L2expanded @@ -605,12 +607,12 @@ class RBFKernel : public GramMatrixBase { if (norm_x1 == nullptr) { tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); norm_x1 = tmp_norm_x1.data(); - matrixRowNormL2(x1, norm_x1, stream); + matrixRowNormL2(handle, x1, norm_x1); } if (norm_x2 == nullptr) { tmp_norm_x2.reserve(x2.extent(0), stream); norm_x2 = tmp_norm_x2.data(); - matrixRowNormL2(x2, norm_x2, stream); + matrixRowNormL2(handle, x2, norm_x2); } // compute L2expanded @@ -655,12 +657,12 @@ class RBFKernel : public GramMatrixBase { if (norm_x1 == nullptr) { tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); norm_x1 = tmp_norm_x1.data(); - matrixRowNormL2(x1, norm_x1, stream); + matrixRowNormL2(handle, x1, norm_x1); } if (norm_x2 == nullptr) { tmp_norm_x2.reserve(x2.get_structure().get_n_rows(), stream); norm_x2 = tmp_norm_x2.data(); - matrixRowNormL2(x2, norm_x2, stream); + matrixRowNormL2(handle, x2, norm_x2); } // compute L2expanded diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 5af7749c39..1e66af3d10 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -24,6 +24,8 @@ #include #include +#include + #include #include @@ -173,88 +175,57 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) RAFT_CUDA_TRY(cudaGetLastError()); } -template -struct CsrReductionPolicy { - static constexpr int LogicalWarpSize = warpSize; - static constexpr int RowsPerBlock = rpb; - static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock; -}; - -template -__global__ void __launch_bounds__(Policy::ThreadsPerBlock) - csrReductionKernel(Type* norm, - const IdxType* ia, - const Type* data, - IdxType N, - Type init, - MainLambda main_op, - ReduceLambda reduce_op, - FinalLambda final_op) -{ - IdxType i = threadIdx.y + (Policy::RowsPerBlock * static_cast(blockIdx.x)); - if (i >= N) return; - - Type acc = init; - for (IdxType j = ia[i] + threadIdx.x; j < ia[i + 1]; j += Policy::LogicalWarpSize) { - acc = reduce_op(acc, main_op(data[j])); - } - acc = raft::logicalWarpReduce(acc, reduce_op); - if (threadIdx.x == 0) { norm[i] = final_op(acc); } -} - -template -void csrReduction(Type* norm, - const IdxType* ia, - const Type* data, - IdxType N, - Type init, - cudaStream_t stream, - MainLambda main_op = raft::identity_op(), - ReduceLambda reduce_op = raft::add_op(), - FinalLambda final_op = raft::identity_op()) +void csr_row_op_wrapper(const IdxType* ia, + const Type* data, + IdxType nnz, + IdxType N, + Type init, + Type* norm, + cudaStream_t stream, + MainLambda main_op = raft::identity_op(), + ReduceLambda reduce_op = raft::add_op(), + FinalLambda final_op = raft::identity_op()) { - common::nvtx::range fun_scope( - "csrReduction<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock); - dim3 threads(Policy::LogicalWarpSize, Policy::RowsPerBlock, 1); - dim3 blocks(ceildiv(N, Policy::RowsPerBlock), 1, 1); - csrReductionKernel - <<>>(norm, ia, data, N, init, main_op, reduce_op, final_op); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + op::csr_row_op( + ia, + N, + nnz, + [data, init, norm, main_op, reduce_op, final_op] __device__( + IdxType row, IdxType start_idx, IdxType stop_idx) { + norm[row] = init; + for (IdxType i = start_idx; i < stop_idx; i++) + norm[row] = final_op(reduce_op(norm[row], main_op(data[i]))); + }, + stream); } template -void rowNormCsrCaller(Type* norm, - const IdxType* ia, +void rowNormCsrCaller(const IdxType* ia, const Type* data, IdxType nnz, IdxType N, + Type* norm, raft::linalg::NormType type, - cudaStream_t stream, - Lambda fin_op) + Lambda fin_op, + cudaStream_t stream) { - // TODO: dispatch nnz to Policy? switch (type) { case raft::linalg::NormType::L1Norm: - csrReduction>( - norm, ia, data, N, (Type)0, stream, raft::abs_op(), raft::add_op(), fin_op); + csr_row_op_wrapper( + ia, data, nnz, N, (Type)0, norm, stream, raft::abs_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::L2Norm: - csrReduction>( - norm, ia, data, N, (Type)0, stream, raft::sq_op(), raft::add_op(), fin_op); + csr_row_op_wrapper( + ia, data, nnz, N, (Type)0, norm, stream, raft::sq_op(), raft::add_op(), fin_op); break; case raft::linalg::NormType::LinfNorm: - csrReduction>( - norm, ia, data, N, (Type)0, stream, raft::abs_op(), raft::max_op(), fin_op); + csr_row_op_wrapper( + ia, data, nnz, N, (Type)0, norm, stream, raft::abs_op(), raft::max_op(), fin_op); break; default: THROW("Unsupported norm type: %d", type); }; diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 6f01569a98..6ddaca0cd6 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -77,25 +77,25 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) * @tparam Type the data type * @tparam Lambda device final lambda * @tparam IdxType Integer type used to for addressing - * @param norm the output vector of row-wise norm, size [N] + * @param stream cuda stream where to launch work * @param ia the input matrix row index array * @param data the input matrix nnz data - * @param N number of rows of data + * @param nnz number of elements in data + * @param N number of rows + * @param norm the output vector of row-wise norm, size [N] * @param type the type of norm to be applied - * @param stream cuda stream where to launch work - * @param fin_op the final lambda op */ template -void rowNormCsr(Type* norm, +void rowNormCsr(raft::device_resources const& handle, const IdxType* ia, const Type* data, - IdxType nnz, - IdxType N, + const IdxType nnz, + const IdxType N, + Type* norm, raft::linalg::NormType type, - cudaStream_t stream, Lambda fin_op = raft::identity_op()) { - detail::rowNormCsrCaller(norm, ia, data, nnz, N, type, stream, fin_op); + detail::rowNormCsrCaller(ia, data, nnz, N, norm, type, fin_op, handle.get_stream()); } }; // end NAMESPACE linalg diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu index f1328fa52d..65d857652c 100644 --- a/cpp/test/sparse/norm.cu +++ b/cpp/test/sparse/norm.cu @@ -62,7 +62,7 @@ class CSRRowNormTest : public ::testing::TestWithParam Date: Thu, 30 Mar 2023 20:47:29 +0000 Subject: [PATCH 13/20] changed order of arguments according to best practice --- .../distance/detail/kernels/gram_matrix.cuh | 72 +++++++++--------- .../detail/kernels/kernel_matrices.cuh | 74 +++++++++---------- cpp/test/distance/gram.cu | 2 +- cpp/test/sparse/gram.cu | 6 +- 4 files changed, 77 insertions(+), 77 deletions(-) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index 31feb75e05..f03f746161 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -64,118 +64,118 @@ class GramMatrixBase { /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format * + * @param [in] handle raft handle * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void operator()(dense_input_matrix_view_t x1, + void operator()(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1 = nullptr, math_t* norm_x2 = nullptr) { - evaluate(x1, x2, out, handle, norm_x1, norm_x2); + evaluate(handle, x1, x2, out, norm_x1, norm_x2); } /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void operator()(csr_input_matrix_view_t x1, + void operator()(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1 = nullptr, math_t* norm_x2 = nullptr) { - evaluate(x1, x2, out, handle, norm_x1, norm_x2); + evaluate(handle, x1, x2, out, norm_x1, norm_x2); } /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void operator()(csr_input_matrix_view_t x1, + void operator()(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1 = nullptr, math_t* norm_x2 = nullptr) { - evaluate(x1, x2, out, handle, norm_x1, norm_x2); + evaluate(handle, x1, x2, out, norm_x1, norm_x2); } // unfortunately, 'evaluate' cannot be templatized as it needs to be virtual /** Evaluate the Gram matrix for two vector sets using simple dot product. * + * @param [in] handle raft handle * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - virtual void evaluate(dense_input_matrix_view_t x1, + virtual void evaluate(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { - linear(x1, x2, out, handle); + linear(handle, x1, x2, out); } /** Evaluate the Gram matrix for two vector sets using simple dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - virtual void evaluate(csr_input_matrix_view_t x1, + virtual void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { - linear(x1, x2, out, handle); + linear(handle, x1, x2, out); } /** Evaluate the Gram matrix for two vector sets using simple dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - virtual void evaluate(csr_input_matrix_view_t x1, + virtual void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { - linear(x1, x2, out, handle); + linear(handle, x1, x2, out); } /** Evaluate the Gram matrix for two vector sets using simple dot product. @@ -340,15 +340,15 @@ class GramMatrixBase { * * Can be used as a building block for more complex kernel functions. * + * @param [in] handle raft handle * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle */ - void linear(dense_input_matrix_view_t x1, + void linear(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, - dense_output_matrix_view_t out, - raft::device_resources const& handle) + dense_output_matrix_view_t out) { // check is_row_major consistency bool is_row_major = get_is_row_major(x1) && get_is_row_major(x2) && get_is_row_major(out); @@ -416,15 +416,15 @@ class GramMatrixBase { * * Can be used as a building block for more complex kernel functions. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle */ - void linear(csr_input_matrix_view_t x1, + void linear(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, - dense_output_matrix_view_t out, - raft::device_resources const& handle) + dense_output_matrix_view_t out) { // check is_row_major consistency bool is_row_major = get_is_row_major(x2) && get_is_row_major(out); @@ -453,15 +453,15 @@ class GramMatrixBase { * * Can be used as a building block for more complex kernel functions. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle */ - void linear(csr_input_matrix_view_t x1, + void linear(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, - dense_output_matrix_view_t out, - raft::device_resources const& handle) + dense_output_matrix_view_t out) { // check is_row_major consistency bool is_row_major = get_is_row_major(out); diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 1117165c76..785c66a3a2 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -188,23 +188,23 @@ class PolynomialKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(dense_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -215,23 +215,23 @@ class PolynomialKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -242,23 +242,23 @@ class PolynomialKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -346,23 +346,23 @@ class TanhKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(dense_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -373,23 +373,23 @@ class TanhKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -400,23 +400,23 @@ class TanhKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and < , > denotes dot product. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 unused. * @param norm_x2 unused. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel( out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream()); } @@ -535,17 +535,17 @@ class RBFKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and | | euclidean distance. * - * @param [in] x2 dense device matrix view, size [n2*n_cols] + * @param [in] handle raft handle + * @param [in] x1 dense device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void evaluate(dense_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + dense_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { @@ -568,7 +568,7 @@ class RBFKernel : public GramMatrixBase { // compute L2expanded bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel(out.data_handle(), ld_out, out.extent(0), @@ -585,17 +585,17 @@ class RBFKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and | | euclidean distance. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 dense device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, dense_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { @@ -618,7 +618,7 @@ class RBFKernel : public GramMatrixBase { // compute L2expanded bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel(out.data_handle(), ld_out, out.extent(0), @@ -635,17 +635,17 @@ class RBFKernel : public GramMatrixBase { * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector * in the x2 set, and | | euclidean distance. * + * @param [in] handle raft handle * @param [in] x1 csr device matrix view, size [n1*n_cols] * @param [in] x2 csr device matrix view, size [n2*n_cols] * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2] - * @param [in] handle raft handle * @param norm_x1 optional L2-norm of x1's rows for computation within RBF. * @param norm_x2 optional L2-norm of x2's rows for computation within RBF. */ - void evaluate(csr_input_matrix_view_t x1, + void evaluate(raft::device_resources const& handle, + csr_input_matrix_view_t x1, csr_input_matrix_view_t x2, dense_output_matrix_view_t out, - raft::device_resources const& handle, math_t* norm_x1, math_t* norm_x2) { @@ -668,7 +668,7 @@ class RBFKernel : public GramMatrixBase { // compute L2expanded bool is_row_major = GramMatrixBase::get_is_row_major(out); int ld_out = is_row_major ? out.stride(0) : out.stride(1); - GramMatrixBase::linear(x1, x2, out, handle); + GramMatrixBase::linear(handle, x1, x2, out); applyKernel(out.data_handle(), ld_out, out.extent(0), diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index c4277c7c98..47da201465 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -129,7 +129,7 @@ class GramMatrixTest : public ::testing::TestWithParam { : raft::make_device_strided_matrix_view( gram.data(), params.n1, params.n2, params.ld_out); - (*kernel)(x1_span, x2_span, out_span, handle); + (*kernel)(handle, x1_span, x2_span, out_span); naiveGramMatrixKernel(params.n1, params.n2, diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index cf0ddfc921..e0bfb94f94 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -232,7 +232,7 @@ class GramMatrixTest : public ::testing::TestWithParam { gram.data(), params.n1, params.n2, params.ld_out); if (params.sparse_input == SparseType::DENSE) { - (*kernel)(x1_span, x2_span, out_span, handle); + (*kernel)(handle, x1_span, x2_span, out_span); } else { x1_csr_indptr.reserve(params.n1 + 1, stream); x1_csr_indices.reserve(params.n1 * params.n_cols, stream); @@ -252,7 +252,7 @@ class GramMatrixTest : public ::testing::TestWithParam { x1_csr_structure); if (params.sparse_input == SparseType::MIX) { - (*kernel)(x1_csr, x2_span, out_span, handle); + (*kernel)(handle, x1_csr, x2_span, out_span); } else { x2_csr_indptr.reserve(params.n2 + 1, stream); x2_csr_indices.reserve(params.n2 * params.n_cols, stream); @@ -270,7 +270,7 @@ class GramMatrixTest : public ::testing::TestWithParam { raft::device_span(x2_csr_data.data(), x2_csr_structure.get_nnz()), x2_csr_structure); - (*kernel)(x1_csr, x2_csr, out_span, handle); + (*kernel)(handle, x1_csr, x2_csr, out_span); } } From 2b6090a860e6fe36c6c63beb50939bceca13d6f2 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 30 Mar 2023 22:18:31 +0000 Subject: [PATCH 14/20] moved kernel computation to public section --- cpp/CMakeLists.txt | 17 +++++----- cpp/include/raft/distance/kernels.cuh | 12 ++----- .../{detail => }/kernels/gram_matrix.cuh | 10 +++--- .../{detail => }/kernels/kernel_factory.cuh | 4 +-- .../{detail => }/kernels/kernel_matrices.cuh | 4 +-- .../specializations/detail/kernels.cuh | 31 ------------------- .../distance/specializations/distance.cuh | 2 +- .../raft/distance/specializations/kernels.cuh | 30 ++++++++++++++++++ .../kernels/gram_matrix_base_double.cu | 4 +-- .../kernels/gram_matrix_base_float.cu | 4 +-- .../kernels/polynomial_kernel_double_int.cu | 4 +-- .../kernels/polynomial_kernel_float_int.cu | 4 +-- .../{detail => }/kernels/rbf_kernel_double.cu | 4 +-- .../{detail => }/kernels/rbf_kernel_float.cu | 4 +-- .../kernels/tanh_kernel_double.cu | 4 +-- .../{detail => }/kernels/tanh_kernel_float.cu | 4 +-- 16 files changed, 65 insertions(+), 77 deletions(-) rename cpp/include/raft/distance/{detail => }/kernels/gram_matrix.cuh (99%) rename cpp/include/raft/distance/{detail => }/kernels/kernel_factory.cuh (95%) rename cpp/include/raft/distance/{detail => }/kernels/kernel_matrices.cuh (99%) delete mode 100644 cpp/include/raft/distance/specializations/detail/kernels.cuh create mode 100644 cpp/include/raft/distance/specializations/kernels.cuh rename cpp/src/distance/specializations/{detail => }/kernels/gram_matrix_base_double.cu (83%) rename cpp/src/distance/specializations/{detail => }/kernels/gram_matrix_base_float.cu (83%) rename cpp/src/distance/specializations/{detail => }/kernels/polynomial_kernel_double_int.cu (82%) rename cpp/src/distance/specializations/{detail => }/kernels/polynomial_kernel_float_int.cu (82%) rename cpp/src/distance/specializations/{detail => }/kernels/rbf_kernel_double.cu (83%) rename cpp/src/distance/specializations/{detail => }/kernels/rbf_kernel_float.cu (84%) rename cpp/src/distance/specializations/{detail => }/kernels/tanh_kernel_double.cu (83%) rename cpp/src/distance/specializations/{detail => }/kernels/tanh_kernel_float.cu (83%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7bb458c44a..bb771d5e26 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -304,16 +304,7 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/detail/inner_product_double_double_double_int.cu src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu - src/distance/specializations/detail/kernels/gram_matrix_base_double.cu - src/distance/specializations/detail/kernels/gram_matrix_base_float.cu - src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu - src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu - # These are somehow missing a kernel definition which is causing a compile error. - # src/distance/specializations/detail/kernels/rbf_kernel_double.cu - # src/distance/specializations/detail/kernels/rbf_kernel_float.cu src/neighbors/brute_force_knn_int64_t_float.cu - src/distance/specializations/detail/kernels/tanh_kernel_double.cu - src/distance/specializations/detail/kernels/tanh_kernel_float.cu src/distance/specializations/detail/kl_divergence_float_float_float_int.cu src/distance/specializations/detail/kl_divergence_double_double_double_int.cu src/distance/specializations/detail/l1_float_float_float_int.cu @@ -332,6 +323,14 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu + src/distance/specializations/kernels/gram_matrix_base_double.cu + src/distance/specializations/kernels/gram_matrix_base_float.cu + src/distance/specializations/kernels/polynomial_kernel_double_int.cu + src/distance/specializations/kernels/polynomial_kernel_float_int.cu + src/distance/specializations/kernels/rbf_kernel_double.cu + src/distance/specializations/kernels/rbf_kernel_float.cu + src/distance/specializations/kernels/tanh_kernel_double.cu + src/distance/specializations/kernels/tanh_kernel_float.cu src/matrix/specializations/detail/select_k_float_uint32_t.cu src/matrix/specializations/detail/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_half_uint32_t.cu diff --git a/cpp/include/raft/distance/kernels.cuh b/cpp/include/raft/distance/kernels.cuh index 86f9f82406..86a2107f82 100644 --- a/cpp/include/raft/distance/kernels.cuh +++ b/cpp/include/raft/distance/kernels.cuh @@ -16,17 +16,9 @@ #pragma once -#include -#include +#include +#include #include #include #include - -namespace raft::distance::kernels { - -// TODO: Need to expose formal APIs for this that are more consistent w/ other APIs in RAFT -using raft::distance::kernels::detail::GramMatrixBase; -using raft::distance::kernels::detail::KernelFactory; - -}; // end namespace raft::distance::kernels diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/kernels/gram_matrix.cuh similarity index 99% rename from cpp/include/raft/distance/detail/kernels/gram_matrix.cuh rename to cpp/include/raft/distance/kernels/gram_matrix.cuh index f03f746161..bdd02be1b1 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/kernels/gram_matrix.cuh @@ -20,14 +20,12 @@ #include #include #include -//#include -#include -#include - #include #include +#include +#include -namespace raft::distance::kernels::detail { +namespace raft::distance::kernels { template using dense_input_matrix_view_t = raft::device_matrix_view; @@ -507,4 +505,4 @@ class GramMatrixBase { } }; -}; // end namespace raft::distance::kernels::detail +}; // end namespace raft::distance::kernels diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/kernels/kernel_factory.cuh similarity index 95% rename from cpp/include/raft/distance/detail/kernels/kernel_factory.cuh rename to cpp/include/raft/distance/kernels/kernel_factory.cuh index 7c74e231d7..9999b29d85 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/kernels/kernel_factory.cuh @@ -21,7 +21,7 @@ #include #include -namespace raft::distance::kernels::detail { +namespace raft::distance::kernels { template class KernelFactory { @@ -61,4 +61,4 @@ class KernelFactory { } }; -}; // end namespace raft::distance::kernels::detail +}; // end namespace raft::distance::kernels diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/kernels/kernel_matrices.cuh similarity index 99% rename from cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh rename to cpp/include/raft/distance/kernels/kernel_matrices.cuh index 785c66a3a2..5bf011bd7a 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/kernels/kernel_matrices.cuh @@ -23,7 +23,7 @@ #include #include -namespace raft::distance::kernels::detail { +namespace raft::distance::kernels { /** Epiloge function for polynomial kernel without padding. * Calculates output = (gain*in + offset)^exponent @@ -738,4 +738,4 @@ class RBFKernel : public GramMatrixBase { } }; -}; // end namespace raft::distance::kernels::detail +}; // end namespace raft::distance::kernels diff --git a/cpp/include/raft/distance/specializations/detail/kernels.cuh b/cpp/include/raft/distance/specializations/detail/kernels.cuh deleted file mode 100644 index 75c9c023e8..0000000000 --- a/cpp/include/raft/distance/specializations/detail/kernels.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -extern template class raft::distance::kernels::detail::GramMatrixBase; -extern template class raft::distance::kernels::detail::GramMatrixBase; - -extern template class raft::distance::kernels::detail::PolynomialKernel; -extern template class raft::distance::kernels::detail::PolynomialKernel; - -extern template class raft::distance::kernels::detail::TanhKernel; -extern template class raft::distance::kernels::detail::TanhKernel; - -// These are somehow missing a kernel definition which is causing a compile error -// extern template class raft::distance::kernels::detail::RBFKernel; -// extern template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file diff --git a/cpp/include/raft/distance/specializations/distance.cuh b/cpp/include/raft/distance/specializations/distance.cuh index a34f696e9e..c2324a24cd 100644 --- a/cpp/include/raft/distance/specializations/distance.cuh +++ b/cpp/include/raft/distance/specializations/distance.cuh @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -32,3 +31,4 @@ #include #include #include +#include diff --git a/cpp/include/raft/distance/specializations/kernels.cuh b/cpp/include/raft/distance/specializations/kernels.cuh new file mode 100644 index 0000000000..f213aeaf9a --- /dev/null +++ b/cpp/include/raft/distance/specializations/kernels.cuh @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +extern template class raft::distance::kernels::GramMatrixBase; +extern template class raft::distance::kernels::GramMatrixBase; + +extern template class raft::distance::kernels::PolynomialKernel; +extern template class raft::distance::kernels::PolynomialKernel; + +extern template class raft::distance::kernels::TanhKernel; +extern template class raft::distance::kernels::TanhKernel; + +extern template class raft::distance::kernels::RBFKernel; +extern template class raft::distance::kernels::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu b/cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu similarity index 83% rename from cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu rename to cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu index 7c80eb29d0..c86bb2796f 100644 --- a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu +++ b/cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::GramMatrixBase; \ No newline at end of file +template class raft::distance::kernels::GramMatrixBase; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu b/cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu similarity index 83% rename from cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu rename to cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu index d777e73dc9..6c160f7e9a 100644 --- a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu +++ b/cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::GramMatrixBase; \ No newline at end of file +template class raft::distance::kernels::GramMatrixBase; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu b/cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu similarity index 82% rename from cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu rename to cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu index 28306d0c21..ae08ae9fef 100644 --- a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu +++ b/cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::PolynomialKernel; \ No newline at end of file +template class raft::distance::kernels::PolynomialKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu b/cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu similarity index 82% rename from cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu rename to cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu index 6609de69ac..7bcbe645e9 100644 --- a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu +++ b/cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::PolynomialKernel; \ No newline at end of file +template class raft::distance::kernels::PolynomialKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu b/cpp/src/distance/specializations/kernels/rbf_kernel_double.cu similarity index 83% rename from cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu rename to cpp/src/distance/specializations/kernels/rbf_kernel_double.cu index 7ea4b60e09..411c4b879f 100644 --- a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu +++ b/cpp/src/distance/specializations/kernels/rbf_kernel_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file +template class raft::distance::kernels::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu b/cpp/src/distance/specializations/kernels/rbf_kernel_float.cu similarity index 84% rename from cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu rename to cpp/src/distance/specializations/kernels/rbf_kernel_float.cu index 423613dcd1..0a1ed92f4e 100644 --- a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu +++ b/cpp/src/distance/specializations/kernels/rbf_kernel_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file +template class raft::distance::kernels::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu b/cpp/src/distance/specializations/kernels/tanh_kernel_double.cu similarity index 83% rename from cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu rename to cpp/src/distance/specializations/kernels/tanh_kernel_double.cu index ab818db73b..7b58343367 100644 --- a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu +++ b/cpp/src/distance/specializations/kernels/tanh_kernel_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::TanhKernel; \ No newline at end of file +template class raft::distance::kernels::TanhKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu b/cpp/src/distance/specializations/kernels/tanh_kernel_float.cu similarity index 83% rename from cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu rename to cpp/src/distance/specializations/kernels/tanh_kernel_float.cu index f7825e577a..8cc73bb81f 100644 --- a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu +++ b/cpp/src/distance/specializations/kernels/tanh_kernel_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::detail::TanhKernel; \ No newline at end of file +template class raft::distance::kernels::TanhKernel; \ No newline at end of file From 563032c7f193e3e29906accb58c76658d33eaab4 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Wed, 5 Apr 2023 06:41:18 -0700 Subject: [PATCH 15/20] removed outdated docstring --- cpp/include/raft/core/device_mdspan.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp index c4a493503e..1b9992212e 100644 --- a/cpp/include/raft/core/device_mdspan.hpp +++ b/cpp/include/raft/core/device_mdspan.hpp @@ -270,7 +270,6 @@ auto make_device_matrix_view(ElementType* ptr, IndexType n_rows, IndexType n_col * @param[in] ptr on device to wrap * @param[in] n_rows number of rows in pointer * @param[in] n_cols number of columns in pointer - * @param[in] is_row_major whether the data is in row major format (column major otherwise) * @param[in] stride leading dimension / stride of data */ template From 23e308da27eebc3a6a5c073ee9c4b99c6e41714c Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Wed, 5 Apr 2023 15:07:10 +0000 Subject: [PATCH 16/20] fix row-major algorithm selection for cusparse spmm --- .../raft/distance/kernels/kernel_matrices.cuh | 2 +- .../raft/sparse/linalg/detail/spmm.hpp | 35 ++++++++++++++----- cpp/include/raft/sparse/linalg/spmm.cuh | 8 +++-- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/distance/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/kernels/kernel_matrices.cuh index 5bf011bd7a..592406876d 100644 --- a/cpp/include/raft/distance/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/kernels/kernel_matrices.cuh @@ -106,7 +106,7 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga * Calculates output_ij = exp(-gain * (norm_x_i + norm_y_j - 2*input_ij)); * * Intended usage - * - input is the product of two matrices X and Y input_ij = \sum_k X_ik * Y_jk + * - input is the product of two matrices X and Y input_ij = sum_k X_ik * Y_jk * - norm_x_i = l2_norm(x_i), where x_i is the i-th row of matrix X * - norm_y_j = l2_norm(y_j), where y_j is the j-th row of matrix Y * diff --git a/cpp/include/raft/sparse/linalg/detail/spmm.hpp b/cpp/include/raft/sparse/linalg/detail/spmm.hpp index ec5328f72e..75ed3d135b 100644 --- a/cpp/include/raft/sparse/linalg/detail/spmm.hpp +++ b/cpp/include/raft/sparse/linalg/detail/spmm.hpp @@ -26,23 +26,41 @@ namespace sparse { namespace linalg { namespace detail { +/** + * @brief determine common data layout for both dense matrices + * @tparam ValueType Data type of Y,Z (float/double) + * @tparam IndexType Type of Y,Z + * @tparam LayoutPolicyY layout of Y + * @tparam LayoutPolicyZ layout of Z + * @param[in] x input raft::device_matrix_view + * @param[in] y input raft::device_matrix_view + * @returns dense matrix descriptor to be used by cuSparse API + */ +template +bool is_row_major(raft::device_matrix_view& y, + raft::device_matrix_view& z) +{ + bool is_row_major = z.stride(1) == 1 && y.stride(1) == 1; + bool is_col_major = z.stride(0) == 1 && y.stride(0) == 1; + ASSERT(is_row_major || is_col_major, "Both matrices need to be either row or col major"); + return is_row_major; +} + /** * @brief create a cuSparse dense descriptor * @tparam ValueType Data type of dense_view (float/double) * @tparam IndexType Type of dense_view * @tparam LayoutPolicy layout of dense_view - * @param[in] handle raft handle * @param[in] dense_view input raft::device_matrix_view + * @param[in] is_row_major data layout of raft::device_matrix_view * @returns dense matrix descriptor to be used by cuSparse API */ template cusparseDnMatDescr_t create_descriptor( - raft::device_matrix_view& dense_view) + raft::device_matrix_view& dense_view, const bool is_row_major) { - ASSERT(dense_view.stride(0) == 1 || dense_view.stride(1) == 1, "Smallest stride needs to be 1"); - bool is_row_major = dense_view.stride(1) == 1; - auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; - IndexType ld = is_row_major ? dense_view.stride(0) : dense_view.stride(1); + auto order = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL; + IndexType ld = is_row_major ? dense_view.stride(0) : dense_view.stride(1); cusparseDnMatDescr_t descr; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat( &descr, @@ -58,7 +76,6 @@ cusparseDnMatDescr_t create_descriptor( * @brief create a cuSparse sparse descriptor * @tparam ValueType Data type of sparse_view (float/double) * @tparam NZType Type of sparse_view - * @param[in] handle raft handle * @param[in] sparse_view input raft::device_csr_matrix_view of size M rows x K columns * @returns sparse matrix descriptor to be used by cuSparse API */ @@ -92,6 +109,7 @@ cusparseSpMatDescr_t create_descriptor( * @param[in] handle raft handle * @param[in] trans_x transpose operation for X * @param[in] trans_y transpose operation for Y + * @param[in] is_row_major data layout of Y,Z * @param[in] alpha scalar * @param[in] descr_x input sparse descriptor * @param[in] descr_y input dense descriptor @@ -102,6 +120,7 @@ template void spmm(raft::device_resources const& handle, const bool trans_x, const bool trans_y, + const bool is_row_major, const ValueType* alpha, cusparseSpMatDescr_t& descr_x, cusparseDnMatDescr_t& descr_y, @@ -110,7 +129,7 @@ void spmm(raft::device_resources const& handle, { auto opX = trans_x ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; auto opY = trans_y ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE; - auto alg = CUSPARSE_SPMM_CSR_ALG1; + auto alg = is_row_major ? CUSPARSE_SPMM_CSR_ALG2 : CUSPARSE_SPMM_CSR_ALG1; size_t bufferSize; RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(), opX, diff --git a/cpp/include/raft/sparse/linalg/spmm.cuh b/cpp/include/raft/sparse/linalg/spmm.cuh index 95396309bc..73170cfc70 100644 --- a/cpp/include/raft/sparse/linalg/spmm.cuh +++ b/cpp/include/raft/sparse/linalg/spmm.cuh @@ -57,11 +57,13 @@ void spmm(raft::device_resources const& handle, const ValueType* beta, raft::device_matrix_view z) { + bool is_row_major = detail::is_row_major(y, z); + auto descr_x = detail::create_descriptor(x); - auto descr_y = detail::create_descriptor(y); - auto descr_z = detail::create_descriptor(z); + auto descr_y = detail::create_descriptor(y, is_row_major); + auto descr_z = detail::create_descriptor(z, is_row_major); - detail::spmm(handle, trans_x, trans_y, alpha, descr_x, descr_y, beta, descr_z); + detail::spmm(handle, trans_x, trans_y, is_row_major, alpha, descr_x, descr_y, beta, descr_z); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr_x)); RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_y)); From a5ee783341160e663cac502824ecb0014558051c Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Wed, 5 Apr 2023 19:48:29 +0000 Subject: [PATCH 17/20] fixed doc build --- cpp/include/raft/sparse/linalg/norm.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 6ddaca0cd6..95831f395e 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -77,13 +77,14 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) * @tparam Type the data type * @tparam Lambda device final lambda * @tparam IdxType Integer type used to for addressing - * @param stream cuda stream where to launch work + * @param handle raft handle * @param ia the input matrix row index array * @param data the input matrix nnz data * @param nnz number of elements in data * @param N number of rows * @param norm the output vector of row-wise norm, size [N] * @param type the type of norm to be applied + * @param fin_op the final lambda op */ template void rowNormCsr(raft::device_resources const& handle, From d7d2f5b7c24bfbcd84d8ed7373adc20a3fb824fe Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Tue, 18 Apr 2023 14:56:37 -0700 Subject: [PATCH 18/20] reverted changeset 2b6090a860e6fe36c6c63beb50939bceca13d6f2 --- cpp/CMakeLists.txt | 17 +++++----- .../{ => detail}/kernels/gram_matrix.cuh | 10 +++--- .../{ => detail}/kernels/kernel_factory.cuh | 4 +-- .../{ => detail}/kernels/kernel_matrices.cuh | 4 +-- cpp/include/raft/distance/kernels.cuh | 12 +++++-- .../specializations/detail/kernels.cuh | 31 +++++++++++++++++++ .../distance/specializations/distance.cuh | 2 +- .../raft/distance/specializations/kernels.cuh | 30 ------------------ .../kernels/gram_matrix_base_double.cu | 4 +-- .../kernels/gram_matrix_base_float.cu | 4 +-- .../kernels/polynomial_kernel_double_int.cu | 4 +-- .../kernels/polynomial_kernel_float_int.cu | 4 +-- .../{ => detail}/kernels/rbf_kernel_double.cu | 4 +-- .../{ => detail}/kernels/rbf_kernel_float.cu | 4 +-- .../kernels/tanh_kernel_double.cu | 4 +-- .../{ => detail}/kernels/tanh_kernel_float.cu | 4 +-- 16 files changed, 77 insertions(+), 65 deletions(-) rename cpp/include/raft/distance/{ => detail}/kernels/gram_matrix.cuh (99%) rename cpp/include/raft/distance/{ => detail}/kernels/kernel_factory.cuh (95%) rename cpp/include/raft/distance/{ => detail}/kernels/kernel_matrices.cuh (99%) create mode 100644 cpp/include/raft/distance/specializations/detail/kernels.cuh delete mode 100644 cpp/include/raft/distance/specializations/kernels.cuh rename cpp/src/distance/specializations/{ => detail}/kernels/gram_matrix_base_double.cu (83%) rename cpp/src/distance/specializations/{ => detail}/kernels/gram_matrix_base_float.cu (83%) rename cpp/src/distance/specializations/{ => detail}/kernels/polynomial_kernel_double_int.cu (82%) rename cpp/src/distance/specializations/{ => detail}/kernels/polynomial_kernel_float_int.cu (82%) rename cpp/src/distance/specializations/{ => detail}/kernels/rbf_kernel_double.cu (83%) rename cpp/src/distance/specializations/{ => detail}/kernels/rbf_kernel_float.cu (84%) rename cpp/src/distance/specializations/{ => detail}/kernels/tanh_kernel_double.cu (83%) rename cpp/src/distance/specializations/{ => detail}/kernels/tanh_kernel_float.cu (83%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cafa981ad6..144f58c4d6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -302,7 +302,16 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/detail/inner_product_double_double_double_int.cu src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu + src/distance/specializations/detail/kernels/gram_matrix_base_double.cu + src/distance/specializations/detail/kernels/gram_matrix_base_float.cu + src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu + src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu + # These are somehow missing a kernel definition which is causing a compile error. + # src/distance/specializations/detail/kernels/rbf_kernel_double.cu + # src/distance/specializations/detail/kernels/rbf_kernel_float.cu src/neighbors/brute_force_knn_int64_t_float.cu + src/distance/specializations/detail/kernels/tanh_kernel_double.cu + src/distance/specializations/detail/kernels/tanh_kernel_float.cu src/distance/specializations/detail/kl_divergence_float_float_float_int.cu src/distance/specializations/detail/kl_divergence_double_double_double_int.cu src/distance/specializations/detail/l1_float_float_float_int.cu @@ -321,14 +330,6 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu - src/distance/specializations/kernels/gram_matrix_base_double.cu - src/distance/specializations/kernels/gram_matrix_base_float.cu - src/distance/specializations/kernels/polynomial_kernel_double_int.cu - src/distance/specializations/kernels/polynomial_kernel_float_int.cu - src/distance/specializations/kernels/rbf_kernel_double.cu - src/distance/specializations/kernels/rbf_kernel_float.cu - src/distance/specializations/kernels/tanh_kernel_double.cu - src/distance/specializations/kernels/tanh_kernel_float.cu src/matrix/specializations/detail/select_k_float_uint32_t.cu src/matrix/specializations/detail/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_half_uint32_t.cu diff --git a/cpp/include/raft/distance/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh similarity index 99% rename from cpp/include/raft/distance/kernels/gram_matrix.cuh rename to cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index bdd02be1b1..f03f746161 100644 --- a/cpp/include/raft/distance/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -20,12 +20,14 @@ #include #include #include -#include -#include +//#include #include #include -namespace raft::distance::kernels { +#include +#include + +namespace raft::distance::kernels::detail { template using dense_input_matrix_view_t = raft::device_matrix_view; @@ -505,4 +507,4 @@ class GramMatrixBase { } }; -}; // end namespace raft::distance::kernels +}; // end namespace raft::distance::kernels::detail diff --git a/cpp/include/raft/distance/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh similarity index 95% rename from cpp/include/raft/distance/kernels/kernel_factory.cuh rename to cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index 9999b29d85..7c74e231d7 100644 --- a/cpp/include/raft/distance/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -21,7 +21,7 @@ #include #include -namespace raft::distance::kernels { +namespace raft::distance::kernels::detail { template class KernelFactory { @@ -61,4 +61,4 @@ class KernelFactory { } }; -}; // end namespace raft::distance::kernels +}; // end namespace raft::distance::kernels::detail diff --git a/cpp/include/raft/distance/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh similarity index 99% rename from cpp/include/raft/distance/kernels/kernel_matrices.cuh rename to cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 592406876d..20893dfce9 100644 --- a/cpp/include/raft/distance/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -23,7 +23,7 @@ #include #include -namespace raft::distance::kernels { +namespace raft::distance::kernels::detail { /** Epiloge function for polynomial kernel without padding. * Calculates output = (gain*in + offset)^exponent @@ -738,4 +738,4 @@ class RBFKernel : public GramMatrixBase { } }; -}; // end namespace raft::distance::kernels +}; // end namespace raft::distance::kernels::detail diff --git a/cpp/include/raft/distance/kernels.cuh b/cpp/include/raft/distance/kernels.cuh index 86a2107f82..86f9f82406 100644 --- a/cpp/include/raft/distance/kernels.cuh +++ b/cpp/include/raft/distance/kernels.cuh @@ -16,9 +16,17 @@ #pragma once -#include -#include +#include +#include #include #include #include + +namespace raft::distance::kernels { + +// TODO: Need to expose formal APIs for this that are more consistent w/ other APIs in RAFT +using raft::distance::kernels::detail::GramMatrixBase; +using raft::distance::kernels::detail::KernelFactory; + +}; // end namespace raft::distance::kernels diff --git a/cpp/include/raft/distance/specializations/detail/kernels.cuh b/cpp/include/raft/distance/specializations/detail/kernels.cuh new file mode 100644 index 0000000000..75c9c023e8 --- /dev/null +++ b/cpp/include/raft/distance/specializations/detail/kernels.cuh @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +extern template class raft::distance::kernels::detail::GramMatrixBase; +extern template class raft::distance::kernels::detail::GramMatrixBase; + +extern template class raft::distance::kernels::detail::PolynomialKernel; +extern template class raft::distance::kernels::detail::PolynomialKernel; + +extern template class raft::distance::kernels::detail::TanhKernel; +extern template class raft::distance::kernels::detail::TanhKernel; + +// These are somehow missing a kernel definition which is causing a compile error +// extern template class raft::distance::kernels::detail::RBFKernel; +// extern template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file diff --git a/cpp/include/raft/distance/specializations/distance.cuh b/cpp/include/raft/distance/specializations/distance.cuh index c2324a24cd..a34f696e9e 100644 --- a/cpp/include/raft/distance/specializations/distance.cuh +++ b/cpp/include/raft/distance/specializations/distance.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -31,4 +32,3 @@ #include #include #include -#include diff --git a/cpp/include/raft/distance/specializations/kernels.cuh b/cpp/include/raft/distance/specializations/kernels.cuh deleted file mode 100644 index f213aeaf9a..0000000000 --- a/cpp/include/raft/distance/specializations/kernels.cuh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -extern template class raft::distance::kernels::GramMatrixBase; -extern template class raft::distance::kernels::GramMatrixBase; - -extern template class raft::distance::kernels::PolynomialKernel; -extern template class raft::distance::kernels::PolynomialKernel; - -extern template class raft::distance::kernels::TanhKernel; -extern template class raft::distance::kernels::TanhKernel; - -extern template class raft::distance::kernels::RBFKernel; -extern template class raft::distance::kernels::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu b/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu similarity index 83% rename from cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu rename to cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu index c86bb2796f..7c80eb29d0 100644 --- a/cpp/src/distance/specializations/kernels/gram_matrix_base_double.cu +++ b/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::GramMatrixBase; \ No newline at end of file +template class raft::distance::kernels::detail::GramMatrixBase; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu b/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu similarity index 83% rename from cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu rename to cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu index 6c160f7e9a..d777e73dc9 100644 --- a/cpp/src/distance/specializations/kernels/gram_matrix_base_float.cu +++ b/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::GramMatrixBase; \ No newline at end of file +template class raft::distance::kernels::detail::GramMatrixBase; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu b/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu similarity index 82% rename from cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu rename to cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu index ae08ae9fef..28306d0c21 100644 --- a/cpp/src/distance/specializations/kernels/polynomial_kernel_double_int.cu +++ b/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::PolynomialKernel; \ No newline at end of file +template class raft::distance::kernels::detail::PolynomialKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu b/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu similarity index 82% rename from cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu rename to cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu index 7bcbe645e9..6609de69ac 100644 --- a/cpp/src/distance/specializations/kernels/polynomial_kernel_float_int.cu +++ b/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::PolynomialKernel; \ No newline at end of file +template class raft::distance::kernels::detail::PolynomialKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/rbf_kernel_double.cu b/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu similarity index 83% rename from cpp/src/distance/specializations/kernels/rbf_kernel_double.cu rename to cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu index 411c4b879f..7ea4b60e09 100644 --- a/cpp/src/distance/specializations/kernels/rbf_kernel_double.cu +++ b/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::RBFKernel; \ No newline at end of file +template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/rbf_kernel_float.cu b/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu similarity index 84% rename from cpp/src/distance/specializations/kernels/rbf_kernel_float.cu rename to cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu index 0a1ed92f4e..423613dcd1 100644 --- a/cpp/src/distance/specializations/kernels/rbf_kernel_float.cu +++ b/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::RBFKernel; \ No newline at end of file +template class raft::distance::kernels::detail::RBFKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/tanh_kernel_double.cu b/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu similarity index 83% rename from cpp/src/distance/specializations/kernels/tanh_kernel_double.cu rename to cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu index 7b58343367..ab818db73b 100644 --- a/cpp/src/distance/specializations/kernels/tanh_kernel_double.cu +++ b/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::TanhKernel; \ No newline at end of file +template class raft::distance::kernels::detail::TanhKernel; \ No newline at end of file diff --git a/cpp/src/distance/specializations/kernels/tanh_kernel_float.cu b/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu similarity index 83% rename from cpp/src/distance/specializations/kernels/tanh_kernel_float.cu rename to cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu index 8cc73bb81f..f7825e577a 100644 --- a/cpp/src/distance/specializations/kernels/tanh_kernel_float.cu +++ b/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include -template class raft::distance::kernels::TanhKernel; \ No newline at end of file +template class raft::distance::kernels::detail::TanhKernel; \ No newline at end of file From f2ebd76dc53b508e96550e069f85791f6ef6bdd6 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Thu, 20 Apr 2023 05:14:43 -0700 Subject: [PATCH 19/20] merge API conflicts with recent updates to sparse structures --- cpp/include/raft/distance/detail/kernels/gram_matrix.cuh | 6 +++--- .../raft/distance/detail/kernels/kernel_matrices.cuh | 8 ++++---- cpp/include/raft/sparse/linalg/detail/spmm.hpp | 2 +- cpp/test/sparse/gram.cu | 5 ++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh index f03f746161..a68b904470 100644 --- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh +++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh @@ -433,7 +433,7 @@ class GramMatrixBase { "GramMatrix leading dimensions for x2 and out do not match"); // check dimensions - auto x1_structure = x1.get_structure(); + auto x1_structure = x1.structure_view(); ASSERT(x1_structure.get_n_rows() == out.extent(0), "GramMatrix input matrix dimensions for x1 and out do not match"); ASSERT(x2.extent(0) == out.extent(1), @@ -469,8 +469,8 @@ class GramMatrixBase { int minor_out = is_row_major ? out.extent(1) : out.extent(0); ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter"); - auto x1_structure = x1.get_structure(); - auto x2_structure = x2.get_structure(); + auto x1_structure = x1.structure_view(); + auto x2_structure = x2.structure_view(); raft::sparse::distance::distances_config_t dist_config(handle); // switch a,b based on is_row_major diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh index 20893dfce9..4b000add21 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh @@ -519,7 +519,7 @@ class RBFKernel : public GramMatrixBase { csr_input_matrix_view_t matrix, math_t* target) { - auto matrix_structure = matrix.get_structure(); + auto matrix_structure = matrix.structure_view(); raft::sparse::linalg::rowNormCsr(handle, matrix_structure.get_indptr().data(), matrix.get_elements().data(), @@ -605,7 +605,7 @@ class RBFKernel : public GramMatrixBase { rmm::device_uvector tmp_norm_x1(0, stream); rmm::device_uvector tmp_norm_x2(0, stream); if (norm_x1 == nullptr) { - tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); + tmp_norm_x1.reserve(x1.structure_view().get_n_rows(), stream); norm_x1 = tmp_norm_x1.data(); matrixRowNormL2(handle, x1, norm_x1); } @@ -655,12 +655,12 @@ class RBFKernel : public GramMatrixBase { rmm::device_uvector tmp_norm_x1(0, stream); rmm::device_uvector tmp_norm_x2(0, stream); if (norm_x1 == nullptr) { - tmp_norm_x1.reserve(x1.get_structure().get_n_rows(), stream); + tmp_norm_x1.reserve(x1.structure_view().get_n_rows(), stream); norm_x1 = tmp_norm_x1.data(); matrixRowNormL2(handle, x1, norm_x1); } if (norm_x2 == nullptr) { - tmp_norm_x2.reserve(x2.get_structure().get_n_rows(), stream); + tmp_norm_x2.reserve(x2.structure_view().get_n_rows(), stream); norm_x2 = tmp_norm_x2.data(); matrixRowNormL2(handle, x2, norm_x2); } diff --git a/cpp/include/raft/sparse/linalg/detail/spmm.hpp b/cpp/include/raft/sparse/linalg/detail/spmm.hpp index 75ed3d135b..b61b561a12 100644 --- a/cpp/include/raft/sparse/linalg/detail/spmm.hpp +++ b/cpp/include/raft/sparse/linalg/detail/spmm.hpp @@ -84,7 +84,7 @@ cusparseSpMatDescr_t create_descriptor( raft::device_csr_matrix_view& sparse_view) { cusparseSpMatDescr_t descr; - auto csr_structure = sparse_view.get_structure(); + auto csr_structure = sparse_view.structure_view(); RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr( &descr, csr_structure.get_n_rows(), diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index e0bfb94f94..86a2e0cf43 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -244,9 +244,8 @@ class GramMatrixTest : public ::testing::TestWithParam { x1_csr_indices.data(), x1_csr_data.data()); - auto x1_csr_structure = raft::make_device_csr_structure_view( + auto x1_csr_structure = raft::make_device_compressed_structure_view( x1_csr_indptr.data(), x1_csr_indices.data(), params.n1, params.n_cols, x1_nnz); - auto x1_csr = raft::device_csr_matrix_view( raft::device_span(x1_csr_data.data(), x1_csr_structure.get_nnz()), x1_csr_structure); @@ -264,7 +263,7 @@ class GramMatrixTest : public ::testing::TestWithParam { x2_csr_indices.data(), x2_csr_data.data()); - auto x2_csr_structure = raft::make_device_csr_structure_view( + auto x2_csr_structure = raft::make_device_compressed_structure_view( x2_csr_indptr.data(), x2_csr_indices.data(), params.n2, params.n_cols, x2_nnz); auto x2_csr = raft::device_csr_matrix_view( raft::device_span(x2_csr_data.data(), x2_csr_structure.get_nnz()), From ae8fbb515835e5a36fb52cf421b0e1687928dd65 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Apr 2023 18:29:29 -0400 Subject: [PATCH 20/20] Fixing build --- cpp/bench/prims/distance/fused_l2_nn.cu | 1 + cpp/include/raft/core/detail/nvtx.hpp | 2 +- cpp/include/raft/distance/detail/kernels/kernel_factory.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/norm.cuh | 2 +- cpp/include/raft/sparse/linalg/norm.cuh | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu index 1c45572782..a5115407dd 100644 --- a/cpp/bench/prims/distance/fused_l2_nn.cu +++ b/cpp/bench/prims/distance/fused_l2_nn.cu @@ -16,6 +16,7 @@ #include #include +#include #include #if defined RAFT_COMPILED #include diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index adbf3a3666..e0f985cb73 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh index 7c74e231d7..bb3ff1c2f5 100644 --- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh +++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 1e66af3d10..56ca2ebfa7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 95831f395e..2bd48c6dc6 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.