Graph primitives API updates (#2220)

Partially address #2003. 1. Renumber VertexFrontier & SortedUniqueKeyBucket to vertex_frontier_t & sorted_unique_key_bucket_t to be consistent with the naming schemes for graph_t & graph_view_t. 2. vertex_frontier_t to take number of buckets as an input parameter (instead of non-type template parameter) and rename `get_bucket` to `bucket`. 3. Use `constexpr size_t` instead of `enum calss` for bucket indices to avoid unnecessary type casting. 4. Update `fill()` of `edge_partition_src|dst_property_t` to take `handle` instead of `stream` to be consistent with other member functions (e.g. `clear()`) 5. Remove `..._v` primitives that working on a subset of local vertices. 6. Update `v_op` to take vertex ID (to be consistent with `e_op` which takes source & destination IDs). 7. Other miscellaneous clean-ups. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Kumar Aatish (https://github.com/kaatish) URL: #2220
rapidsai · Apr 19, 2022 · 886c724 · 886c724
1 parent a6dc6b7
commit 886c724
Show file tree

Hide file tree

Showing 20 changed files with 331 additions and 440 deletions.
diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh
@@ -503,7 +503,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
 
     auto execution_policy = handle.get_thrust_policy();
     if constexpr (GraphViewType::is_multi_gpu) {
-      minor_tmp_buffer.fill(minor_init, handle.get_stream());
+      minor_tmp_buffer.fill(handle, minor_init);
     } else {
       thrust::fill(execution_policy,
                    vertex_value_output_first,

diff --git a/cpp/include/cugraph/prims/count_if_v.cuh b/cpp/include/cugraph/prims/count_if_v.cuh
@@ -27,6 +27,24 @@
 
 namespace cugraph {
 
+namespace detail {
+
+template <typename vertex_t, typename VertexValueInputIterator, typename VertexOp>
+struct count_if_call_v_op_t {
+  vertex_t local_vertex_partition_range_first{};
+  VertexValueInputIterator vertex_value_input_first{};
+  VertexOp v_op{};
+
+  __device__ bool operator()(vertex_t i)
+  {
+    return v_op(local_vertex_partition_range_first + i, *(vertex_value_input_first + i))
+             ? vertex_t{1}
+             : vertex_t{0};
+  }
+};
+
+}  // namespace detail
+
 /**
  * @brief Count the number of vertices that satisfies the given predicate.
  *
@@ -42,8 +60,8 @@ namespace cugraph {
  * @param vertex_value_input_first Iterator pointing to the vertex properties for the first
  * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive)
  * is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size().
- * @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p
- * graph_view.local_vertex_partition_range_size())) and returns true if this vertex should be
+ * @param v_op Binary operator takes vertex ID and *(@p vertex_value_input_first + i) (where i is
+ * [0, @p graph_view.local_vertex_partition_range_size())) and returns true if this vertex should be
  * included in the returned count.
  * @return GraphViewType::vertex_type Number of times @p v_op returned true.
  */
@@ -53,47 +71,16 @@ typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle,
                                                VertexValueInputIterator vertex_value_input_first,
                                                VertexOp v_op)
 {
-  auto count =
-    thrust::count_if(handle.get_thrust_policy(),
-                     vertex_value_input_first,
-                     vertex_value_input_first + graph_view.local_vertex_partition_range_size(),
-                     v_op);
-  if (GraphViewType::is_multi_gpu) {
-    count =
-      host_scalar_allreduce(handle.get_comms(), count, raft::comms::op_t::SUM, handle.get_stream());
-  }
-  return count;
-}
+  using vertex_t = typename GraphViewType::vertex_type;
 
-/**
- * @brief Count the number of vertices that satisfies the given predicate.
- *
- * This version (conceptually) iterates over only a subset of the graph vertices. This function
- * actually works as thrust::count_if() on [@p input_first, @p input_last) (followed by
- * inter-process reduction in multi-GPU). @p input_last - @p input_first (or the sum of @p
- * input_last - @p input_first values in multi-GPU) should not overflow GraphViewType::vertex_type.
- *
- * @tparam GraphViewType Type of the passed non-owning graph object.
- * @tparam InputIterator Type of the iterator for input values.
- * @tparam VertexOp VertexOp Type of the unary predicate operator.
- * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
- * handles to various CUDA libraries) to run graph algorithms.
- * @param graph_view Non-owning graph object.
- * @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to
- * @p v_op.
- * @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op.
- * @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p
- * input_first)) and returns true if this vertex should be included in the returned count.
- * @return GraphViewType::vertex_type Number of times @p v_op returned true.
- */
-template <typename GraphViewType, typename InputIterator, typename VertexOp>
-typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle,
-                                               GraphViewType const& graph_view,
-                                               InputIterator input_first,
-                                               InputIterator input_last,
-                                               VertexOp v_op)
-{
-  auto count = thrust::count_if(handle.get_thrust_policy(), input_first, input_last, v_op);
+  auto it = thrust::make_transform_iterator(
+    thrust::make_counting_iterator(vertex_t{0}),
+    detail::count_if_call_v_op_t<vertex_t, VertexValueInputIterator, VertexOp>{
+      graph_view.local_vertex_partition_range_first(), vertex_value_input_first, v_op});
+  auto count = thrust::reduce(handle.get_thrust_policy(),
+                              it,
+                              it + graph_view.local_vertex_partition_range_size(),
+                              vertex_t{0});
   if (GraphViewType::is_multi_gpu) {
     count =
       host_scalar_allreduce(handle.get_comms(), count, raft::comms::op_t::SUM, handle.get_stream());

diff --git a/cpp/include/cugraph/prims/edge_partition_src_dst_property.cuh b/cpp/include/cugraph/prims/edge_partition_src_dst_property.cuh
@@ -254,10 +254,12 @@ class edge_partition_major_property_t {
     edge_partition_major_value_start_offsets_ = std::nullopt;
   }
 
-  void fill(T value, rmm::cuda_stream_view stream)
+  void fill(raft::handle_t const& handle, T value)
   {
-    thrust::fill(
-      rmm::exec_policy(stream), value_data(), value_data() + size_dataframe_buffer(buffer_), value);
+    thrust::fill(handle.get_thrust_policy(),
+                 value_data(),
+                 value_data() + size_dataframe_buffer(buffer_),
+                 value);
   }
 
   auto key_first() { return key_first_; }
@@ -267,6 +269,7 @@ class edge_partition_major_property_t {
                                                             (*edge_partition_key_offsets_).back())
                       : std::nullopt;
   }
+
   auto value_data() { return get_dataframe_buffer_begin(buffer_); }
 
   auto device_view() const
@@ -351,14 +354,17 @@ class edge_partition_minor_property_t {
     shrink_to_fit_dataframe_buffer(buffer_, handle.get_stream());
   }
 
-  void fill(T value, rmm::cuda_stream_view stream)
+  void fill(raft::handle_t const& handle, T value)
   {
-    thrust::fill(
-      rmm::exec_policy(stream), value_data(), value_data() + size_dataframe_buffer(buffer_), value);
+    thrust::fill(handle.get_thrust_policy(),
+                 value_data(),
+                 value_data() + size_dataframe_buffer(buffer_),
+                 value);
   }
 
   auto key_first() { return key_first_; }
   auto key_last() { return key_last_; }
+
   auto value_data() { return get_dataframe_buffer_begin(buffer_); }
 
   auto device_view() const
@@ -480,7 +486,7 @@ class edge_partition_src_property_t {
 
   void clear(raft::handle_t const& handle) { property_.clear(handle); }
 
-  void fill(T value, rmm::cuda_stream_view stream) { property_.fill(value, stream); }
+  void fill(raft::handle_t const& handle, T value) { property_.fill(handle, value); }
 
   auto key_first() { return property_.key_first(); }
   auto key_last() { return property_.key_last(); }
@@ -561,7 +567,7 @@ class edge_partition_dst_property_t {
 
   void clear(raft::handle_t const& handle) { property_.clear(handle); }
 
-  void fill(T value, rmm::cuda_stream_view stream) { property_.fill(value, stream); }
+  void fill(raft::handle_t const& handle, T value) { property_.fill(handle, value); }
 
   auto key_first() { return property_.key_first(); }
   auto key_last() { return property_.key_last(); }

diff --git a/cpp/include/cugraph/prims/property_op_utils.cuh b/cpp/include/cugraph/prims/property_op_utils.cuh
@@ -191,7 +191,7 @@ struct property_op<thrust::tuple<Args...>, Op>
 
  private:
   template <typename T, std::size_t... Is>
-  __host__ __device__ constexpr auto sum_impl(T& t1, T& t2, std::index_sequence<Is...>)
+  __host__ __device__ constexpr auto binary_op_impl(T& t1, T& t2, std::index_sequence<Is...>)
   {
     return thrust::make_tuple((Op<typename thrust::tuple_element<Is, Type>::type>()(
       thrust::get<Is>(t1), thrust::get<Is>(t2)))...);
@@ -200,7 +200,7 @@ struct property_op<thrust::tuple<Args...>, Op>
  public:
   __host__ __device__ constexpr auto operator()(const Type& t1, const Type& t2)
   {
-    return sum_impl(t1, t2, std::make_index_sequence<thrust::tuple_size<Type>::value>());
+    return binary_op_impl(t1, t2, std::make_index_sequence<thrust::tuple_size<Type>::value>());
   }
 };
 

diff --git a/cpp/include/cugraph/prims/reduce_v.cuh b/cpp/include/cugraph/prims/reduce_v.cuh
@@ -68,44 +68,4 @@ T reduce_v(raft::handle_t const& handle,
   return ret;
 }
 
-/**
- * @brief Reduce the vertex properties.
- *
- * This version (conceptually) iterates over only a subset of the graph vertices. This function
- * actually works as thrust::reduce() on [@p input_first, @p input_last) (followed by
- * inter-process reduction in multi-GPU).
- *
- * @tparam GraphViewType Type of the passed non-owning graph object.
- * @tparam InputIterator Type of the iterator for input values.
- * @tparam T Type of the initial value.
- * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
- * handles to various CUDA libraries) to run graph algorithms.
- * @param graph_view Non-owning graph object.
- * @param input_first Iterator pointing to the beginning (inclusive) of the values to be reduced.
- * @param input_last Iterator pointing to the end (exclusive) of the values to be reduced.
- * @param init Initial value to be added to the reduced input vertex properties.
- * @return T Reduction of the input vertex properties.
- */
-template <typename GraphViewType, typename InputIterator, typename T>
-T reduce_v(raft::handle_t const& handle,
-           GraphViewType const& graph_view,
-           InputIterator input_first,
-           InputIterator input_last,
-           T init               = T{},
-           raft::comms::op_t op = raft::comms::op_t::SUM)
-{
-  auto ret = op_dispatch<T>(op, [&handle, &graph_view, input_first, input_last, init](auto op) {
-    return thrust::reduce(
-      handle.get_thrust_policy(),
-      input_first,
-      input_last,
-      ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? T{} : init,
-      op);
-  });
-  if constexpr (GraphViewType::is_multi_gpu) {
-    ret = host_scalar_allreduce(handle.get_comms(), ret, op, handle.get_stream());
-  }
-  return ret;
-}
-
 }  // namespace cugraph
diff --git a/cpp/include/cugraph/prims/transform_reduce_v.cuh b/cpp/include/cugraph/prims/transform_reduce_v.cuh
@@ -27,6 +27,22 @@
 
 namespace cugraph {
 
+namespace detail {
+
+template <typename vertex_t, typename VertexValueInputIterator, typename VertexOp, typename T>
+struct transform_reduce_call_v_op_t {
+  vertex_t local_vertex_partition_range_first{};
+  VertexValueInputIterator vertex_value_input_first{};
+  VertexOp v_op{};
+
+  __device__ T operator()(vertex_t i)
+  {
+    return v_op(local_vertex_partition_range_first + i, *(vertex_value_input_first + i));
+  }
+};
+
+}  // namespace detail
+
 /**
  * @brief Apply an operator to the vertex properties and reduce.
  *
@@ -43,8 +59,9 @@ namespace cugraph {
  * @param vertex_value_input_first Iterator pointing to the vertex properties for the first
  * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive)
  * is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size().
- * @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p
- * graph_view.local_vertex_partition_range_size())) and returns a transformed value to be reduced.
+ * @param v_op Binary operator takes vertex ID and *(@p vertex_value_input_first + i) (where i is
+ * [0, @p graph_view.local_vertex_partition_range_size())) and returns a transformed value to be
+ * reduced.
  * @param init Initial value to be added to the transform-reduced input vertex properties.
  * @return T Reduction of the @p v_op outputs.
  */
@@ -56,61 +73,19 @@ T transform_reduce_v(raft::handle_t const& handle,
                      T init,
                      raft::comms::op_t op = raft::comms::op_t::SUM)
 {
-  auto id = identity_element<T>(op);
-  auto ret =
-    op_dispatch<T>(op, [&handle, &graph_view, vertex_value_input_first, v_op, id, init](auto op) {
-      return thrust::transform_reduce(
-        handle.get_thrust_policy(),
-        vertex_value_input_first,
-        vertex_value_input_first + graph_view.local_vertex_partition_range_size(),
-        v_op,
-        ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? id : init,
-        op);
-    });
-  if (GraphViewType::is_multi_gpu) {
-    ret = host_scalar_allreduce(handle.get_comms(), ret, op, handle.get_stream());
-  }
-  return ret;
-}
+  using vertex_t = typename GraphViewType::vertex_type;
 
-/**
- * @brief Apply an operator to the vertex properties and reduce.
- *
- * This version (conceptually) iterates over only a subset of the graph vertices. This function
- * actually works as thrust::transform_reduce() on [@p input_first, @p input_last) (followed by
- * inter-process reduction in multi-GPU).
- *
- * @tparam GraphViewType Type of the passed non-owning graph object.
- * @tparam InputIterator Type of the iterator for input values.
- * @tparam VertexOp
- * @tparam T Type of the initial value.
- * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
- * handles to various CUDA libraries) to run graph algorithms.
- * @param graph_view Non-owning graph object.
- * @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to
- * @p v_op.
- * @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op.
- * @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p
- * input_first)) and returns a transformed value to be reduced.
- * @param init Initial value to be added to the transform-reduced input vertex properties.
- * @return T Reduction of the @p v_op outputs.
- */
-template <typename GraphViewType, typename InputIterator, typename VertexOp, typename T>
-T transform_reduce_v(raft::handle_t const& handle,
-                     GraphViewType const& graph_view,
-                     InputIterator input_first,
-                     InputIterator input_last,
-                     VertexOp v_op,
-                     T init               = T{},
-                     raft::comms::op_t op = raft::comms::op_t::SUM)
-{
-  auto ret = op_dispatch<T>(op, [&handle, input_first, input_last, v_op, init](auto op) {
-    return thrust::transform_reduce(
+  auto id = identity_element<T>(op);
+  auto it = thrust::make_transform_iterator(
+    thrust::make_counting_iterator(vertex_t{0}),
+    detail::transform_reduce_call_v_op_t<vertex_t, VertexValueInputIterator, VertexOp, T>{
+      graph_view.local_vertex_partition_range_first(), vertex_value_input_first, v_op});
+  auto ret = op_dispatch<T>(op, [&handle, &graph_view, it, id, init](auto op) {
+    return thrust::reduce(
       handle.get_thrust_policy(),
-      input_first,
-      input_last,
-      v_op,
-      ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? T{} : init,
+      it,
+      it + graph_view.local_vertex_partition_range_size(),
+      ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? id : init,
       op);
   });
   if (GraphViewType::is_multi_gpu) {