diff --git a/cpp/include/cugraph/prims/count_if_e.cuh b/cpp/include/cugraph/prims/count_if_e.cuh index e7caaedc2fa..5155beadb94 100644 --- a/cpp/include/cugraph/prims/count_if_e.cuh +++ b/cpp/include/cugraph/prims/count_if_e.cuh @@ -52,6 +52,7 @@ namespace cugraph { * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), property values for the source, and property values for the destination and returns if * this edge should be included in the returned count. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return GraphViewType::edge_type Number of times @p e_op returned true. */ template typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle, GraphViewType const& graph_view, VertexValueInputIterator vertex_value_input_first, - VertexOp v_op) + VertexOp v_op, + bool do_expensive_check = false) { using vertex_t = typename GraphViewType::vertex_type; + if (do_expensive_check) { + // currently, nothing to do + } + auto it = thrust::make_transform_iterator( thrust::make_counting_iterator(vertex_t{0}), detail::count_if_call_v_op_t{ diff --git a/cpp/include/cugraph/prims/extract_if_e.cuh b/cpp/include/cugraph/prims/extract_if_e.cuh index 382158587c7..4f7c3fc369b 100644 --- a/cpp/include/cugraph/prims/extract_if_e.cuh +++ b/cpp/include/cugraph/prims/extract_if_e.cuh @@ -113,6 +113,7 @@ struct call_e_op_t { * weight), property values for the source, and property values for the destination and returns a * boolean value to designate whether to include this edge in the returned edge list (if true is * returned) or not (if false is returned). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, * rmm::device_uvector, * std::optional>> Tuple storing an @@ -129,12 +130,17 @@ extract_if_e(raft::handle_t const& handle, GraphViewType const& graph_view, EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgeOp e_op) + EdgeOp e_op, + bool do_expensive_check = false) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + if (do_expensive_check) { + // currently, nothing to do + } + std::vector edgelist_edge_counts(graph_view.number_of_local_edge_partitions(), size_t{0}); for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { edgelist_edge_counts[i] = diff --git a/cpp/include/cugraph/prims/per_src_dst_key_transform_reduce_e.cuh b/cpp/include/cugraph/prims/per_src_dst_key_transform_reduce_e.cuh index e206fe86fa3..63bea458963 100644 --- a/cpp/include/cugraph/prims/per_src_dst_key_transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/per_src_dst_key_transform_reduce_e.cuh @@ -591,6 +591,7 @@ per_src_dst_key_transform_reduce_e( * transformed value to be reduced to (source key, value) pairs. * @param init Initial value to be added to the value in each transform-reduced (source key, value) * pair. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple Tuple of rmm::device_uvector and * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar @@ -609,12 +610,17 @@ auto per_src_key_transform_reduce_e( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionSrcKeyInputWrapper edge_partition_src_key_input, EdgeOp e_op, - T init) + T init, + bool do_expensive_check = false) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); static_assert(std::is_same::value); + if (do_expensive_check) { + // currently, nothing to do + } + return detail::per_src_dst_key_transform_reduce_e(handle, graph_view, edge_partition_src_value_input, @@ -663,6 +669,7 @@ auto per_src_key_transform_reduce_e( * transformed value to be reduced to (destination key, value) pairs. * @param init Initial value to be added to the value in each transform-reduced (destination key, * value) pair. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple Tuple of rmm::device_uvector and * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar @@ -681,12 +688,17 @@ auto per_dst_key_transform_reduce_e( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionDstKeyInputWrapper edge_partition_dst_key_input, EdgeOp e_op, - T init) + T init, + bool do_expensive_check = false) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); static_assert(std::is_same::value); + if (do_expensive_check) { + // currently, nothing to do + } + return detail::per_src_dst_key_transform_reduce_e(handle, graph_view, edge_partition_src_value_input, diff --git a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index a745b2f3dd5..a52ff22aa79 100644 --- a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -198,6 +198,7 @@ struct reduce_with_init_t { * first (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template keys(thrust::distance(map_unique_key_first, map_unique_key_last), + handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), map_unique_key_first, map_unique_key_last, keys.begin()); + thrust::sort(handle.get_thrust_policy(), keys.begin(), keys.end()); + auto has_duplicates = + (thrust::unique(handle.get_thrust_policy(), keys.begin(), keys.end()) != keys.end()); + + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto num_invalid_keys = thrust::count_if( + handle.get_thrust_policy(), + map_unique_key_first, + map_unique_key_last, + [comm_rank, + key_func = detail::compute_gpu_id_from_ext_vertex_t{ + comm_size}] __device__(auto key) { return key_func(key) != comm_rank; }); + num_invalid_keys = + host_scalar_allreduce(comm, num_invalid_keys, raft::comms::op_t::SUM, handle.get_stream()); + CUGRAPH_EXPECTS( + num_invalid_keys == 0, + "Invalid input argument: map (unique key, value) pairs should be pre-shuffled."); + + has_duplicates = + host_scalar_allreduce( + comm, has_duplicates ? int{1} : int{0}, raft::comms::op_t::MAX, handle.get_stream()) == + int{1} + ? true + : false; + } + + CUGRAPH_EXPECTS(has_duplicates == false, + "Invalid input argument: there are duplicates in [map_unique_key_first, " + "map_unique_key_last)."); + } + auto total_global_mem = handle.get_device_properties().totalGlobalMem; auto element_size = sizeof(vertex_t) * 2 + sizeof(weight_t); auto constexpr mem_frugal_ratio = diff --git a/cpp/include/cugraph/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/include/cugraph/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 309388b3d25..802a558c9c8 100644 --- a/cpp/include/cugraph/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/include/cugraph/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -916,6 +916,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template (handle, graph_view, edge_partition_src_value_input, @@ -975,6 +981,7 @@ void per_v_transform_reduce_incoming_e( * first (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template (handle, graph_view, edge_partition_src_value_input, diff --git a/cpp/include/cugraph/prims/reduce_v.cuh b/cpp/include/cugraph/prims/reduce_v.cuh index ae6192ea137..a690004e96a 100644 --- a/cpp/include/cugraph/prims/reduce_v.cuh +++ b/cpp/include/cugraph/prims/reduce_v.cuh @@ -52,6 +52,7 @@ namespace cugraph { * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has * known member variables) to take a more optimized code path. See the documentation in the * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return T Reduced input vertex property values. */ template @@ -59,7 +60,8 @@ T reduce_v(raft::handle_t const& handle, GraphViewType const& graph_view, VertexValueInputIterator vertex_value_input_first, T init, - ReduceOp reduce_op) + ReduceOp reduce_op, + bool do_expensive_check = false) { using vertex_t = typename GraphViewType::vertex_type; @@ -68,6 +70,10 @@ T reduce_v(raft::handle_t const& handle, std::remove_cv_t::value_type>, std::remove_cv_t>); + if (do_expensive_check) { + // currently, nothing to do + } + if (graph_view.number_of_vertices() == vertex_t{0}) { return init; } T ret{}; @@ -183,14 +189,20 @@ T reduce_v(raft::handle_t const& handle, * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) * is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size(). * @param init Initial value to be added to the reduced input vertex property values. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return T Reduced input vertex property values. */ template T reduce_v(raft::handle_t const& handle, GraphViewType const& graph_view, VertexValueInputIterator vertex_value_input_first, - T init) + T init, + bool do_expensive_check = false) { + if (do_expensive_check) { + // currently, nothing to do + } + return reduce_v(handle, graph_view, vertex_value_input_first, init, reduce_op::plus{}); } @@ -209,16 +221,22 @@ T reduce_v(raft::handle_t const& handle, * @param vertex_value_input_first Iterator pointing to the vertex property values for the first * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) * is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Reduced input vertex property values. */ template auto reduce_v(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexValueInputIterator vertex_value_input_first) + VertexValueInputIterator vertex_value_input_first, + bool do_expensive_check = false) { using T = std::remove_cv_t::value_type>; + if (do_expensive_check) { + // currently, nothing to do + } + return reduce_v(handle, graph_view, vertex_value_input_first, T{}, reduce_op::plus{}); } diff --git a/cpp/include/cugraph/prims/transform_reduce_e.cuh b/cpp/include/cugraph/prims/transform_reduce_e.cuh index 54da768837d..7f0a6258daf 100644 --- a/cpp/include/cugraph/prims/transform_reduce_e.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_e.cuh @@ -384,6 +384,7 @@ __global__ void trasnform_reduce_e_high_degree( * weight), property values for the source, and property values for the destination and returns a * value to be reduced. * @param init Initial value to be added to the reduced @p edge_op outputs. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return T Transform-reduced @p edge_op outputs. */ template ::value); @@ -404,6 +406,10 @@ T transform_reduce_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; + if (do_expensive_check) { + // currently, nothing to do + } + property_op edge_property_add{}; auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); @@ -547,6 +553,7 @@ T transform_reduce_e(raft::handle_t const& handle, * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), property values for the source, and property values for the destination and returns a * value to be reduced. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Transform-reduced @p edge_op outputs. */ template ::type; + if (do_expensive_check) { + // currently, nothing to do + } + return transform_reduce_e( handle, graph_view, edge_partition_src_value_input, edge_partition_dst_value_input, e_op, T{}); } diff --git a/cpp/include/cugraph/prims/transform_reduce_v.cuh b/cpp/include/cugraph/prims/transform_reduce_v.cuh index 0b1dd937f5f..e1fa0d95c00 100644 --- a/cpp/include/cugraph/prims/transform_reduce_v.cuh +++ b/cpp/include/cugraph/prims/transform_reduce_v.cuh @@ -70,6 +70,7 @@ struct transform_reduce_call_v_op_t { * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has * known member variables) to take a more optimized code path. See the documentation in the * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return T Transformed and reduced input vertex property values. */ template @@ -123,8 +130,13 @@ T transform_reduce_v(raft::handle_t const& handle, GraphViewType const& graph_view, VertexValueInputIterator vertex_value_input_first, VertexOp v_op, - T init) + T init, + bool do_expensive_check = false) { + if (do_expensive_check) { + // currently, nothing to do + } + return transform_reduce_v( handle, graph_view, vertex_value_input_first, v_op, init, reduce_op::plus{}); } @@ -146,19 +158,25 @@ T transform_reduce_v(raft::handle_t const& handle, * @param v_op Binary operator takes vertex ID and *(@p vertex_value_input_first + i) (where i is * [0, @p graph_view.local_vertex_partition_range_size())) and returns a transformed value to be * reduced. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Transformed and reduced input vertex property values. */ template auto transform_reduce_v(raft::handle_t const& handle, GraphViewType const& graph_view, VertexValueInputIterator vertex_value_input_first, - VertexOp v_op) + VertexOp v_op, + bool do_expensive_check = false) { using vertex_t = typename GraphViewType::vertex_type; using vertex_value_input_t = typename thrust::iterator_traits::value_type; using T = std::invoke_result_t; + if (do_expensive_check) { + // currently, nothing to do + } + return transform_reduce_v( handle, graph_view, vertex_value_input_first, v_op, T{}, reduce_op::plus{}); } diff --git a/cpp/include/cugraph/prims/update_edge_partition_src_dst_property.cuh b/cpp/include/cugraph/prims/update_edge_partition_src_dst_property.cuh index 14dcbdb8725..203e7f2e944 100644 --- a/cpp/include/cugraph/prims/update_edge_partition_src_dst_property.cuh +++ b/cpp/include/cugraph/prims/update_edge_partition_src_dst_property.cuh @@ -459,6 +459,7 @@ void update_edge_partition_minor_property( * @param edge_partition_src_property_output Device-copyable wrapper used to store source property * values (for the edge sources assigned to this process in multi-GPU). Use * cugraph::edge_partition_src_property_t::device_view(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void update_edge_partition_src_property( @@ -468,8 +469,13 @@ void update_edge_partition_src_property( edge_partition_src_property_t< GraphViewType, typename std::iterator_traits::value_type>& - edge_partition_src_property_output) + edge_partition_src_property_output, + bool do_expensive_check = false) { + if (do_expensive_check) { + // currently, nothing to do + } + if constexpr (GraphViewType::is_storage_transposed) { update_edge_partition_minor_property( handle, graph_view, vertex_property_input_first, edge_partition_src_property_output); @@ -502,6 +508,7 @@ void update_edge_partition_src_property( * @param edge_partition_src_property_output Device-copyable wrapper used to store source property * values (for the edge sources assigned to this process in multi-GPU). Use * cugraph::edge_partition_src_property_t::device_view(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void update_edge_partition_src_property( @@ -513,8 +520,29 @@ void update_edge_partition_src_property( edge_partition_src_property_t< GraphViewType, typename std::iterator_traits::value_type>& - edge_partition_src_property_output) + edge_partition_src_property_output, + bool do_expensive_check = false) { + if (do_expensive_check) { + auto num_invalids = thrust::count_if( + handle.get_thrust_policy(), + vertex_first, + vertex_last, + [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), + local_vertex_partition_range_last = + graph_view.local_vertex_partition_range_last()] __device__(auto v) { + return (v < local_vertex_partition_range_first) || (v >= local_vertex_partition_range_last); + }); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + num_invalids = + host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS( + num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + } + if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_partition_minor_property(handle, graph_view, @@ -551,6 +579,7 @@ void update_edge_partition_src_property( * @param edge_partition_dst_property_output Device-copyable wrapper used to store destination * property values (for the edge destinations assigned to this process in multi-GPU). Use * cugraph::edge_partition_dst_property_t::device_view(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void update_edge_partition_dst_property( @@ -560,8 +589,13 @@ void update_edge_partition_dst_property( edge_partition_dst_property_t< GraphViewType, typename std::iterator_traits::value_type>& - edge_partition_dst_property_output) + edge_partition_dst_property_output, + bool do_expensive_check = false) { + if (do_expensive_check) { + // currently, nothing to do + } + if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_partition_major_property( handle, graph_view, vertex_property_input_first, edge_partition_dst_property_output); @@ -595,6 +629,7 @@ void update_edge_partition_dst_property( * @param edge_partition_dst_property_output Device-copyable wrapper used to store destination * property values (for the edge destinations assigned to this process in multi-GPU). Use * cugraph::edge_partition_dst_property_t::device_view(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void update_edge_partition_dst_property( @@ -606,8 +641,29 @@ void update_edge_partition_dst_property( edge_partition_dst_property_t< GraphViewType, typename std::iterator_traits::value_type>& - edge_partition_dst_property_output) + edge_partition_dst_property_output, + bool do_expensive_check = false) { + if (do_expensive_check) { + auto num_invalids = thrust::count_if( + handle.get_thrust_policy(), + vertex_first, + vertex_last, + [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), + local_vertex_partition_range_last = + graph_view.local_vertex_partition_range_last()] __device__(auto v) { + return (v < local_vertex_partition_range_first) || (v >= local_vertex_partition_range_last); + }); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + num_invalids = + host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS( + num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + } + if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_partition_major_property(handle, graph_view,