ReduceV test (#1710)

Added test for reduce_v primitive. Authors: - Kumar Aatish (https://github.com/kaatish) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: #1710
rapidsai · Jul 28, 2021 · e074f82 · e074f82
1 parent e5b3599
commit e074f82
Show file tree

Hide file tree

Showing 10 changed files with 445 additions and 76 deletions.
diff --git a/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/prims/copy_v_transform_reduce_in_out_nbr.cuh
@@ -17,7 +17,7 @@
 
 #include <cugraph/experimental/graph_view.hpp>
 #include <cugraph/matrix_partition_device_view.cuh>
-#include <cugraph/prims/edge_op_utils.cuh>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/prims/reduce_op.cuh>
 #include <cugraph/utilities/dataframe_buffer.cuh>
 #include <cugraph/utilities/device_comm.cuh>
@@ -75,6 +75,7 @@ __global__ void for_all_major_for_all_nbr_hypersparse(
 
   auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count());
 
+  property_add<T> edge_property_add{};
   while (idx < static_cast<size_t>(dcs_nzd_vertex_count)) {
     auto major =
       *(matrix_partition.get_major_from_major_hypersparse_idx_nocheck(static_cast<vertex_t>(idx)));
@@ -118,13 +119,13 @@ __global__ void for_all_major_for_all_nbr_hypersparse(
     };
 
     if (update_major) {
-      *(result_value_output_first + (major - major_hypersparse_first)) = thrust::transform_reduce(
-        thrust::seq,
-        thrust::make_counting_iterator(edge_t{0}),
-        thrust::make_counting_iterator(local_degree),
-        transform_op,
-        init,
-        [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); });
+      *(result_value_output_first + (major - major_hypersparse_first)) =
+        thrust::transform_reduce(thrust::seq,
+                                 thrust::make_counting_iterator(edge_t{0}),
+                                 thrust::make_counting_iterator(local_degree),
+                                 transform_op,
+                                 init,
+                                 edge_property_add);
     } else {
       thrust::for_each(
         thrust::seq,
@@ -169,6 +170,7 @@ __global__ void for_all_major_for_all_nbr_low_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   auto idx                = static_cast<size_t>(tid);
 
+  property_add<T> edge_property_add{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
     vertex_t const* indices{nullptr};
@@ -212,13 +214,13 @@ __global__ void for_all_major_for_all_nbr_low_degree(
     };
 
     if (update_major) {
-      *(result_value_output_first + idx) = thrust::transform_reduce(
-        thrust::seq,
-        thrust::make_counting_iterator(edge_t{0}),
-        thrust::make_counting_iterator(local_degree),
-        transform_op,
-        init,
-        [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); });
+      *(result_value_output_first + idx) =
+        thrust::transform_reduce(thrust::seq,
+                                 thrust::make_counting_iterator(edge_t{0}),
+                                 thrust::make_counting_iterator(local_degree),
+                                 transform_op,
+                                 init,
+                                 edge_property_add);
     } else {
       thrust::for_each(
         thrust::seq,
@@ -266,6 +268,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   auto idx                = static_cast<size_t>(tid / raft::warp_size());
 
+  property_add<e_op_result_t> edge_property_add{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
     vertex_t const* indices{nullptr};
@@ -302,7 +305,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree(
                                     *(adj_matrix_col_value_input_first + col_offset),
                                     e_op);
       if (update_major) {
-        e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result);
+        e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result);
       } else {
         atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result);
       }
@@ -344,6 +347,7 @@ __global__ void for_all_major_for_all_nbr_high_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   auto idx                = static_cast<size_t>(blockIdx.x);
 
+  property_add<e_op_result_t> edge_property_add{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
     vertex_t const* indices{nullptr};
@@ -380,7 +384,7 @@ __global__ void for_all_major_for_all_nbr_high_degree(
                                     *(adj_matrix_col_value_input_first + col_offset),
                                     e_op);
       if (update_major) {
-        e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result);
+        e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result);
       } else {
         atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result);
       }

diff --git a/cpp/include/cugraph/prims/count_if_e.cuh b/cpp/include/cugraph/prims/count_if_e.cuh
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cugraph/experimental/graph_view.hpp>
-#include <cugraph/prims/edge_op_utils.cuh>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/prims/transform_reduce_e.cuh>
 
 #include <raft/handle.hpp>

diff --git a/cpp/include/cugraph/prims/edge_op_utils.cuh → ...clude/cugraph/prims/property_op_utils.cuh b/cpp/include/cugraph/prims/edge_op_utils.cuh → ...clude/cugraph/prims/property_op_utils.cuh
@@ -123,18 +123,28 @@ struct cast_edge_op_bool_to_integer {
 };
 
 template <typename T>
-__host__ __device__ std::enable_if_t<std::is_arithmetic<T>::value, T> plus_edge_op_result(
-  T const& lhs, T const& rhs)
-{
-  return lhs + rhs;
-}
+struct property_add : public thrust::plus<T> {
+};
 
-template <typename T>
-__host__ __device__ std::enable_if_t<is_thrust_tuple<T>::value, T> plus_edge_op_result(T const& lhs,
-                                                                                       T const& rhs)
-{
-  return plus_thrust_tuple<T>()(lhs, rhs);
-}
+template <typename... Args>
+struct property_add<thrust::tuple<Args...>>
+  : public thrust::
+      binary_function<thrust::tuple<Args...>, thrust::tuple<Args...>, thrust::tuple<Args...>> {
+  using Type = thrust::tuple<Args...>;
+
+ private:
+  template <typename T, std::size_t... I>
+  __device__ constexpr auto sum_impl(T& t1, T& t2, std::index_sequence<I...>)
+  {
+    return thrust::make_tuple((thrust::get<I>(t1) + thrust::get<I>(t2))...);
+  }
+
+ public:
+  __device__ constexpr auto operator()(const Type& t1, const Type& t2)
+  {
+    return sum_impl(t1, t2, std::make_index_sequence<thrust::tuple_size<Type>::value>());
+  }
+};
 
 template <typename Iterator, typename T>
 __device__ std::enable_if_t<thrust::detail::is_discard_iterator<Iterator>::value, void>

diff --git a/cpp/include/cugraph/prims/reduce_v.cuh b/cpp/include/cugraph/prims/reduce_v.cuh
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <cugraph/experimental/graph_view.hpp>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/host_scalar_comm.cuh>
 
@@ -51,10 +52,12 @@ T reduce_v(raft::handle_t const& handle,
            VertexValueInputIterator vertex_value_input_first,
            T init)
 {
-  auto ret = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                            vertex_value_input_first,
-                            vertex_value_input_first + graph_view.get_number_of_local_vertices(),
-                            init);
+  auto ret = thrust::reduce(
+    rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+    vertex_value_input_first,
+    vertex_value_input_first + graph_view.get_number_of_local_vertices(),
+    ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() == 0)) ? init : T{},
+    property_add<T>());
   if (GraphViewType::is_multi_gpu) {
     ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream());
   }
@@ -87,7 +90,11 @@ T reduce_v(raft::handle_t const& handle,
            T init)
 {
   auto ret = thrust::reduce(
-    rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, init);
+    rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+    input_first,
+    input_last,
+    ((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() == 0)) ? init : T{},
+    property_add<T>());
   if (GraphViewType::is_multi_gpu) {
     ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream());
   }

diff --git a/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/prims/transform_reduce_by_adj_matrix_row_col_key_e.cuh
@@ -18,7 +18,7 @@
 #include <cugraph/experimental/detail/graph_utils.cuh>
 #include <cugraph/experimental/graph_view.hpp>
 #include <cugraph/matrix_partition_device_view.cuh>
-#include <cugraph/prims/edge_op_utils.cuh>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/utilities/dataframe_buffer.cuh>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/shuffle_comm.cuh>

diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cugraph/experimental/graph_view.hpp>
-#include <cugraph/prims/edge_op_utils.cuh>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/host_scalar_comm.cuh>
 
@@ -65,6 +65,7 @@ __global__ void for_all_major_for_all_nbr_hypersparse(
 
   auto dcs_nzd_vertex_count = *(matrix_partition.get_dcs_nzd_vertex_count());
 
+  property_add<e_op_result_t> edge_property_add{};
   e_op_result_t e_op_result_sum{};
   while (idx < static_cast<size_t>(dcs_nzd_vertex_count)) {
     auto major =
@@ -111,9 +112,9 @@ __global__ void for_all_major_for_all_nbr_hypersparse(
                    e_op);
       },
       e_op_result_t{},
-      [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); });
+      edge_property_add);
 
-    e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum);
+    e_op_result_sum = edge_property_add(e_op_result_sum, sum);
     idx += gridDim.x * blockDim.x;
   }
 
@@ -149,6 +150,7 @@ __global__ void for_all_major_for_all_nbr_low_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   size_t idx              = static_cast<size_t>(tid);
 
+  property_add<e_op_result_t> edge_property_add{};
   e_op_result_t e_op_result_sum{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
@@ -195,9 +197,9 @@ __global__ void for_all_major_for_all_nbr_low_degree(
                    e_op);
       },
       e_op_result_t{},
-      [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); });
+      edge_property_add);
 
-    e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum);
+    e_op_result_sum = edge_property_add(e_op_result_sum, sum);
     idx += gridDim.x * blockDim.x;
   }
 
@@ -235,6 +237,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   size_t idx              = static_cast<size_t>(tid / raft::warp_size());
 
+  property_add<e_op_result_t> edge_property_add{};
   e_op_result_t e_op_result_sum{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
@@ -269,7 +272,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree(
                                     *(adj_matrix_row_value_input_first + row_offset),
                                     *(adj_matrix_col_value_input_first + col_offset),
                                     e_op);
-      e_op_result_sum = plus_edge_op_result<e_op_result_t>(e_op_result_sum, e_op_result);
+      e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result);
     }
     idx += gridDim.x * (blockDim.x / raft::warp_size());
   }
@@ -305,6 +308,7 @@ __global__ void for_all_major_for_all_nbr_high_degree(
   auto major_start_offset = static_cast<size_t>(major_first - matrix_partition.get_major_first());
   size_t idx              = static_cast<size_t>(blockIdx.x);
 
+  property_add<e_op_result_t> edge_property_add{};
   e_op_result_t e_op_result_sum{};
   while (idx < static_cast<size_t>(major_last - major_first)) {
     auto major_offset = major_start_offset + idx;
@@ -339,7 +343,7 @@ __global__ void for_all_major_for_all_nbr_high_degree(
                                     *(adj_matrix_row_value_input_first + row_offset),
                                     *(adj_matrix_col_value_input_first + col_offset),
                                     e_op);
-      e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result);
+      e_op_result_sum = edge_property_add(e_op_result_sum, e_op_result);
     }
     idx += gridDim.x;
   }
@@ -400,6 +404,8 @@ T transform_reduce_e(raft::handle_t const& handle,
   using edge_t   = typename GraphViewType::edge_type;
   using weight_t = typename GraphViewType::weight_type;
 
+  property_add<T> edge_property_add{};
+
   auto result_buffer = allocate_dataframe_buffer<T>(1, handle.get_stream());
   thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
                get_dataframe_buffer_begin<T>(result_buffer),
@@ -498,18 +504,17 @@ T transform_reduce_e(raft::handle_t const& handle,
     }
   }
 
-  auto result =
-    thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                   get_dataframe_buffer_begin<T>(result_buffer),
-                   get_dataframe_buffer_begin<T>(result_buffer) + 1,
-                   T{},
-                   [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); });
+  auto result = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                               get_dataframe_buffer_begin<T>(result_buffer),
+                               get_dataframe_buffer_begin<T>(result_buffer) + 1,
+                               T{},
+                               edge_property_add);
 
   if (GraphViewType::is_multi_gpu) {
     result = host_scalar_allreduce(handle.get_comms(), result, handle.get_stream());
   }
 
-  return plus_edge_op_result(init, result);
+  return edge_property_add(init, result);
 }
 
 }  // namespace experimental

diff --git a/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/prims/update_frontier_v_push_if_out_nbr.cuh
@@ -18,7 +18,7 @@
 #include <cugraph/experimental/graph_view.hpp>
 #include <cugraph/matrix_partition_device_view.cuh>
 #include <cugraph/partition_manager.hpp>
-#include <cugraph/prims/edge_op_utils.cuh>
+#include <cugraph/prims/property_op_utils.cuh>
 #include <cugraph/prims/reduce_op.cuh>
 #include <cugraph/utilities/dataframe_buffer.cuh>
 #include <cugraph/utilities/device_comm.cuh>

diff --git a/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh b/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh
@@ -61,20 +61,6 @@ struct compute_thrust_tuple_element_sizes_impl<TupleType, I, I> {
   void compute(std::array<size_t, thrust::tuple_size<TupleType>::value>& arr) const {}
 };
 
-template <typename TupleType, size_t I, size_t N>
-struct plus_thrust_tuple_impl {
-  __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const
-  {
-    thrust::get<I>(lhs) += thrust::get<I>(rhs);
-    plus_thrust_tuple_impl<TupleType, I + 1, N>().compute(lhs, rhs);
-  }
-};
-
-template <typename TupleType, size_t I>
-struct plus_thrust_tuple_impl<TupleType, I, I> {
-  __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const {}
-};
-
 template <typename T>
 __device__ std::enable_if_t<std::is_arithmetic<T>::value, void> atomic_accumulate_impl(
   thrust::detail::any_assign& /* dereferencing thrust::discard_iterator results in this type */ lhs,
@@ -193,18 +179,6 @@ struct compute_thrust_tuple_element_sizes {
   }
 };
 
-template <typename TupleType>
-struct plus_thrust_tuple {
-  __host__ __device__ constexpr TupleType operator()(TupleType const& lhs,
-                                                     TupleType const& rhs) const
-  {
-    size_t constexpr tuple_size = thrust::tuple_size<TupleType>::value;
-    auto ret                    = lhs;
-    detail::plus_thrust_tuple_impl<TupleType, size_t{0}, tuple_size>().compute(ret, rhs);
-    return ret;
-  }
-};
-
 template <typename Iterator, typename TupleType>
 struct atomic_accumulate_thrust_tuple {
   __device__ constexpr void operator()(Iterator iter, TupleType const& value) const

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
@@ -463,6 +463,10 @@ if(BUILD_CUGRAPH_MG_TESTS)
         ###########################################################################################
         # - MG PRIMS COUNT_IF_V tests -------------------------------------------------------------
         ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu)
+
+        ###########################################################################################
+        # - MG PRIMS REDUCE_V tests ---------------------------------------------------------------
+        ConfigureTestMG(MG_REDUCE_V_TEST prims/mg_reduce_v.cu)
     else()
        message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.")
     endif()