rapidsai · rapids-bot · Dec 7, 2021 · Nov 16, 2021 · Nov 16, 2021 · Nov 16, 2021
diff --git a/cpp/include/raft/sparse/convert/coo.hpp b/cpp/include/raft/sparse/convert/coo.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/sparse/convert/detail/coo.cuh>
+
+namespace raft {
+namespace sparse {
+namespace convert {
+
+/**
+ * @brief Convert a CSR row_ind array to a COO rows array
+ * @param row_ind: Input CSR row_ind array
+ * @param m: size of row_ind array
+ * @param coo_rows: Output COO row array
+ * @param nnz: size of output COO row array
+ * @param stream: cuda stream to use
+ */
+template <typename value_idx = int>
+void csr_to_coo(const value_idx *row_ind, value_idx m, value_idx *coo_rows,
+                value_idx nnz, cudaStream_t stream) {
+  detail::csr_to_coo<value_idx, 32>(row_ind, m, coo_rows, nnz, stream);
+}
+
+};  // end NAMESPACE convert
+};  // end NAMESPACE sparse
+};  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/convert/csr.hpp b/cpp/include/raft/sparse/convert/csr.hpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/sparse/convert/detail/csr.cuh>
+#include <raft/sparse/csr.hpp>
+
+namespace raft {
+namespace sparse {
+namespace convert {
+
+template <typename value_t>
+void coo_to_csr(const raft::handle_t &handle, const int *srcRows,
+                const int *srcCols, const value_t *srcVals, int nnz, int m,
+                int *dst_offsets, int *dstCols, value_t *dstVals) {
+  detail::coo_to_csr(handle, srcRows, srcCols, srcVals, nnz, m, dst_offsets,
+                     dstCols, dstVals);
+}
+
+/**
+ * @brief Constructs an adjacency graph CSR row_ind_ptr array from
+ * a row_ind array and adjacency array.
+ * @tparam T the numeric type of the index arrays
+ * @tparam TPB_X the number of threads to use per block for kernels
+ * @tparam Lambda function for fused operation in the adj_graph construction
+ * @param row_ind the input CSR row_ind array
+ * @param total_rows number of vertices in graph
+ * @param nnz number of non-zeros
+ * @param batchSize number of vertices in current batch
+ * @param adj an adjacency array (size batchSize x total_rows)
+ * @param row_ind_ptr output CSR row_ind_ptr for adjacency graph
+ * @param stream cuda stream to use
+ * @param fused_op: the fused operation
+ */
+template <typename Index_, typename Lambda = auto(Index_, Index_, Index_)->void>
+void csr_adj_graph_batched(const Index_ *row_ind, Index_ total_rows, Index_ nnz,
+                           Index_ batchSize, const bool *adj,
+                           Index_ *row_ind_ptr, cudaStream_t stream,
+                           Lambda fused_op) {
+  detail::csr_adj_graph_batched<Index_, 32, Lambda>(
+    row_ind, total_rows, nnz, batchSize, adj, row_ind_ptr, stream, fused_op);
+}
+
+template <typename Index_, typename Lambda = auto(Index_, Index_, Index_)->void>
+void csr_adj_graph_batched(const Index_ *row_ind, Index_ total_rows, Index_ nnz,
+                           Index_ batchSize, const bool *adj,
+                           Index_ *row_ind_ptr, cudaStream_t stream) {
+  detail::csr_adj_graph_batched<Index_, 32, Lambda>(
+    row_ind, total_rows, nnz, batchSize, adj, row_ind_ptr, stream);
+}
+
+/**
+ * @brief Constructs an adjacency graph CSR row_ind_ptr array from a
+ * a row_ind array and adjacency array.
+ * @tparam T the numeric type of the index arrays
+ * @tparam TPB_X the number of threads to use per block for kernels
+ * @param row_ind the input CSR row_ind array
+ * @param total_rows number of total vertices in graph
+ * @param nnz number of non-zeros
+ * @param adj an adjacency array
+ * @param row_ind_ptr output CSR row_ind_ptr for adjacency graph
+ * @param stream cuda stream to use
+ * @param fused_op the fused operation
+ */
+template <typename Index_, typename Lambda = auto(Index_, Index_, Index_)->void>
+void csr_adj_graph(const Index_ *row_ind, Index_ total_rows, Index_ nnz,
+                   const bool *adj, Index_ *row_ind_ptr, cudaStream_t stream,
+                   Lambda fused_op) {
+  detail::csr_adj_graph<Index_, 32, Lambda>(row_ind, total_rows, nnz, adj,
+                                            row_ind_ptr, stream, fused_op);
+}
+
+/**
+ * @brief Generate the row indices array for a sorted COO matrix
+ *
+ * @param rows: COO rows array
+ * @param nnz: size of COO rows array
+ * @param row_ind: output row indices array
+ * @param m: number of rows in dense matrix
+ * @param stream: cuda stream to use
+ */
+template <typename T>
+void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m,
+                       cudaStream_t stream) {
+  detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream);
+}
+
+/**
+ * @brief Generate the row indices array for a sorted COO matrix
+ *
+ * @param coo: Input COO matrix
+ * @param row_ind: output row indices array
+ * @param stream: cuda stream to use
+ */
+template <typename T>
+void sorted_coo_to_csr(COO<T> *coo, int *row_ind, cudaStream_t stream) {
+  detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows,
+                            stream);
+}
+
+};  // end NAMESPACE convert
+};  // end NAMESPACE sparse
+};  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/sparse/convert/detail/dense.cuh>
+
+namespace raft {
+namespace sparse {
+namespace convert {
+
+/**
+ * Convert CSR arrays to a dense matrix in either row-
+ * or column-major format. A custom kernel is used when
+ * row-major output is desired since cusparse does not
+ * output row-major.
+ * @tparam value_idx : data type of the CSR index arrays
+ * @tparam value_t : data type of the CSR value array
+ * @param[in] handle : cusparse handle for conversion
+ * @param[in] nrows : number of rows in CSR
+ * @param[in] ncols : number of columns in CSR
+ * @param[in] csr_indptr : CSR row index pointer array
+ * @param[in] csr_indices : CSR column indices array
+ * @param[in] csr_data : CSR data array
+ * @param[in] lda : Leading dimension (used for col-major only)
+ * @param[out] out : Dense output array of size nrows * ncols
+ * @param[in] stream : Cuda stream for ordering events
+ * @param[in] row_major : Is row-major output desired?
+ */
+template <typename value_idx, typename value_t>
+void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols,
+                  const value_idx *csr_indptr, const value_idx *csr_indices,
+                  const value_t *csr_data, value_idx lda, value_t *out,
+                  cudaStream_t stream, bool row_major = true) {
+  detail::csr_to_dense<value_idx, value_t>(handle, nrows, ncols, csr_indptr,
+                                           csr_indices, csr_data, lda, out,
+                                           stream, row_major);
+}
+
+};  // end NAMESPACE convert
+};  // end NAMESPACE sparse
+};  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/convert/coo.cuh → ...nclude/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/coo.cuh → ...nclude/raft/sparse/convert/detail/coo.cuh
@@ -29,12 +29,13 @@
 #include <algorithm>
 #include <iostream>
 
-#include <raft/sparse/utils.h>
-#include <raft/sparse/coo.cuh>
+#include <raft/sparse/detail/utils.h>
+#include <raft/sparse/coo.hpp>
 
 namespace raft {
 namespace sparse {
 namespace convert {
+namespace detail {
 
 template <typename value_idx = int, int TPB_X = 32>
 __global__ void csr_to_coo_kernel(const value_idx *row_ind, value_idx m,
@@ -69,6 +70,7 @@ void csr_to_coo(const value_idx *row_ind, value_idx m, value_idx *coo_rows,
   CUDA_CHECK(cudaGetLastError());
 }
 
+};  // end NAMESPACE detail
 };  // end NAMESPACE convert
 };  // end NAMESPACE sparse
 };  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/convert/csr.cuh → ...nclude/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh → ...nclude/raft/sparse/convert/detail/csr.cuh
@@ -33,14 +33,15 @@
 #include <algorithm>
 #include <iostream>
 
-#include <raft/sparse/utils.h>
-#include <raft/sparse/coo.cuh>
-#include <raft/sparse/linalg/degree.cuh>
-#include <raft/sparse/op/row_op.cuh>
+#include <raft/sparse/detail/utils.h>
+#include <raft/sparse/coo.hpp>
+#include <raft/sparse/linalg/degree.hpp>
+#include <raft/sparse/op/row_op.hpp>
 
 namespace raft {
 namespace sparse {
 namespace convert {
+namespace detail {
 
 template <typename value_t>
 void coo_to_csr(const raft::handle_t &handle, const int *srcRows,
@@ -89,7 +90,7 @@ void csr_adj_graph_batched(const Index_ *row_ind, Index_ total_rows, Index_ nnz,
                            Index_ batchSize, const bool *adj,
                            Index_ *row_ind_ptr, cudaStream_t stream,
                            Lambda fused_op) {
-  op::csr_row_op<Index_, TPB_X>(
+  op::csr_row_op<Index_>(
     row_ind, batchSize, nnz,
     [fused_op, adj, total_rows, row_ind_ptr, batchSize, nnz] __device__(
       Index_ row, Index_ start_idx, Index_ stop_idx) {
@@ -154,7 +155,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m,
 
   CUDA_CHECK(cudaMemsetAsync(row_counts.data(), 0, m * sizeof(T), stream));
 
-  linalg::coo_degree<32>(rows, nnz, row_counts.data(), stream);
+  linalg::coo_degree(rows, nnz, row_counts.data(), stream);
 
   // create csr compressed row index from row counts
   thrust::device_ptr<T> row_counts_d =
@@ -164,18 +165,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m,
                  c_ind_d);
 }
 
-/**
- * @brief Generate the row indices array for a sorted COO matrix
- *
- * @param coo: Input COO matrix
- * @param row_ind: output row indices array
- * @param stream: cuda stream to use
- */
-template <typename T>
-void sorted_coo_to_csr(COO<T> *coo, int *row_ind, cudaStream_t stream) {
-  sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream);
-}
-
+};  // end NAMESPACE detail
 };  // end NAMESPACE convert
 };  // end NAMESPACE sparse
 };  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/convert/dense.cuh → ...lude/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/dense.cuh → ...lude/raft/sparse/convert/detail/dense.cuh
@@ -30,11 +30,12 @@
 #include <algorithm>
 #include <iostream>
 
-#include <raft/sparse/utils.h>
+#include <raft/sparse/detail/utils.h>
 
 namespace raft {
 namespace sparse {
 namespace convert {
+namespace detail {
 
 template <typename value_t>
 __global__ void csr_to_dense_warp_per_row_kernel(int n_cols,
@@ -83,8 +84,8 @@ void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols,
                   cudaStream_t stream, bool row_major = true) {
   if (!row_major) {
     /**
-     * If we need col-major, use cusparse.
-     */
+                         * If we need col-major, use cusparse.
+                         */
     cusparseMatDescr_t out_mat;
     CUSPARSE_CHECK(cusparseCreateMatDescr(&out_mat));
     CUSPARSE_CHECK(cusparseSetMatIndexBase(out_mat, CUSPARSE_INDEX_BASE_ZERO));
@@ -105,6 +106,7 @@ void csr_to_dense(cusparseHandle_t handle, value_idx nrows, value_idx ncols,
   }
 }
 
+};  // namespace detail
 };  // end NAMESPACE convert
 };  // end NAMESPACE sparse
 };  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/coo.hpp b/cpp/include/raft/sparse/coo.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/sparse/detail/coo.cuh>
+
+namespace raft {
+namespace sparse {
+
+/** @brief A Container object for sparse coordinate. There are two motivations
+ * behind using a container for COO arrays.
+ *
+ * The first motivation is that it simplifies code, rather than always having
+ * to pass three arrays as function arguments.
+ *
+ * The second is more subtle, but much more important. The size
+ * of the resulting COO from a sparse operation is often not known ahead of time,
+ * since it depends on the contents of the underlying graph. The COO object can
+ * allocate the underlying arrays lazily so that the object can be created by the
+ * user and passed as an output argument in a sparse primitive. The sparse primitive
+ * would have the responsibility for allocating and populating the output arrays,
+ * while the original caller still maintains ownership of the underlying memory.
+ *
+ * @tparam value_t: the type of the value array.
+ * @tparam value_idx: the type of index array
+ *
+ */
+template <typename value_t, typename value_idx = int>
+using COO = detail::COO<value_t, value_idx>;
+
+};  // namespace sparse
+};  // namespace raft