diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh index e5b64b1e02f..c7521831fb8 100644 --- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -127,7 +127,7 @@ class edge_partition_edge_property_device_view_t { value_t> atomic_add(edge_t offset, value_t val) const { - cugraph::atomic_add(value_first_ + offset, val); + return cugraph::atomic_add(value_first_ + offset, val); } template @@ -154,7 +154,7 @@ class edge_partition_edge_property_device_view_t { value_t> elementwise_atomic_min(edge_t offset, value_t val) const { - cugraph::elementwise_atomic_min(value_first_ + offset, val); + return cugraph::elementwise_atomic_min(value_first_ + offset, val); } template @@ -164,7 +164,7 @@ class edge_partition_edge_property_device_view_t { value_t> elementwise_atomic_max(edge_t offset, value_t val) const { - cugraph::elementwise_atomic_max(value_first_ + offset, val); + return cugraph::elementwise_atomic_max(value_first_ + offset, val); } private: diff --git a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh index 7578c646175..824662a957e 100644 --- a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -133,7 +133,7 @@ class edge_partition_endpoint_property_device_view_t { atomic_add(vertex_t offset, value_t val) const { auto val_offset = value_offset(offset); - cugraph::atomic_add(value_first_ + val_offset, val); + return cugraph::atomic_add(value_first_ + val_offset, val); } template @@ -162,7 +162,7 @@ class edge_partition_endpoint_property_device_view_t { elementwise_atomic_min(vertex_t offset, value_t val) const { auto val_offset = value_offset(offset); - cugraph::elementwise_atomic_min(value_first_ + val_offset, val); + return cugraph::elementwise_atomic_min(value_first_ + val_offset, val); } template @@ -173,7 +173,7 @@ class edge_partition_endpoint_property_device_view_t { elementwise_atomic_max(vertex_t offset, value_t val) const { auto val_offset = value_offset(offset); - cugraph::elementwise_atomic_max(value_first_ + val_offset, val); + return cugraph::elementwise_atomic_max(value_first_ + val_offset, val); } private: diff --git a/cpp/include/cugraph/utilities/atomic_ops.cuh b/cpp/include/cugraph/utilities/atomic_ops.cuh index 6af9841d71f..2b6102a6a17 100644 --- a/cpp/include/cugraph/utilities/atomic_ops.cuh +++ b/cpp/include/cugraph/utilities/atomic_ops.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -112,7 +113,7 @@ __device__ T> atomic_and(Iterator iter, T value) { - detail::thrust_tuple_atomic_and( + return detail::thrust_tuple_atomic_and( iter, value, std::make_index_sequence::value>{}); } @@ -140,7 +141,7 @@ __device__ T> atomic_or(Iterator iter, T value) { - detail::thrust_tuple_atomic_or( + return detail::thrust_tuple_atomic_or( iter, value, std::make_index_sequence::value>{}); } @@ -155,22 +156,22 @@ template __device__ std::enable_if_t && std::is_same_v::value_type, T>, - void> + T> atomic_add(Iterator iter, T value) { - atomicAdd(&(thrust::raw_reference_cast(*iter)), value); + return atomicAdd(&(thrust::raw_reference_cast(*iter)), value); } template __device__ std::enable_if_t::value_type>::value && is_thrust_tuple::value, - void> + T> atomic_add(Iterator iter, T value) { static_assert(thrust::tuple_size::value_type>::value == thrust::tuple_size::value); - detail::thrust_tuple_atomic_add( + return detail::thrust_tuple_atomic_add( iter, value, std::make_index_sequence::value>{}); } @@ -191,7 +192,7 @@ __device__ T> elementwise_atomic_cas(Iterator iter, T compare, T value) { - detail::thrust_tuple_elementwise_atomic_cas( + return detail::thrust_tuple_elementwise_atomic_cas( iter, compare, value, std::make_index_sequence::value>{}); } @@ -206,22 +207,22 @@ template __device__ std::enable_if_t::value_type, T>::value && std::is_arithmetic::value, - void> + T> elementwise_atomic_min(Iterator iter, T const& value) { - atomicMin(&(thrust::raw_reference_cast(*iter)), value); + return atomicMin(&(thrust::raw_reference_cast(*iter)), value); } template __device__ std::enable_if_t::value_type>::value && is_thrust_tuple::value, - void> + T> elementwise_atomic_min(Iterator iter, T const& value) { static_assert(thrust::tuple_size::value_type>::value == thrust::tuple_size::value); - detail::thrust_tuple_elementwise_atomic_min( + return detail::thrust_tuple_elementwise_atomic_min( iter, value, std::make_index_sequence::value>{}); } @@ -236,23 +237,35 @@ template __device__ std::enable_if_t::value_type, T>::value && std::is_arithmetic::value, - void> + T> elementwise_atomic_max(Iterator iter, T const& value) { - atomicMax(&(thrust::raw_reference_cast(*iter)), value); + return atomicMax(&(thrust::raw_reference_cast(*iter)), value); } template __device__ std::enable_if_t::value_type>::value && is_thrust_tuple::value, - void> + T> elementwise_atomic_max(Iterator iter, T const& value) { static_assert(thrust::tuple_size::value_type>::value == thrust::tuple_size::value); - detail::thrust_tuple_elementwise_atomic_max( + return detail::thrust_tuple_elementwise_atomic_max( iter, value, std::make_index_sequence::value>{}); } +template +__device__ void packed_bool_atomic_set(Iterator iter, T offset, bool val) +{ + auto packed_output_offset = packed_bool_offset(offset); + auto packed_output_mask = packed_bool_mask(offset); + if (val) { + atomicOr(iter + packed_output_offset, packed_output_mask); + } else { + atomicAnd(iter + packed_output_offset, ~packed_output_mask); + } +} + } // namespace cugraph diff --git a/cpp/include/cugraph/utilities/dataframe_buffer.hpp b/cpp/include/cugraph/utilities/dataframe_buffer.hpp index ab4c4eff6b5..a20613c65ef 100644 --- a/cpp/include/cugraph/utilities/dataframe_buffer.hpp +++ b/cpp/include/cugraph/utilities/dataframe_buffer.hpp @@ -40,30 +40,30 @@ auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, buffer_size, stream_view)...); } -template -auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, TupleType& buffer) +template +auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, TupleType& buffer) { - return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).begin())...)); + return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).begin())...)); } -template -auto get_dataframe_buffer_end_tuple_impl(std::index_sequence, TupleType& buffer) +template +auto get_dataframe_buffer_end_tuple_impl(std::index_sequence, TupleType& buffer) { - return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).end())...)); + return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).end())...)); } -template -auto get_dataframe_buffer_cbegin_tuple_impl(std::index_sequence, TupleType& buffer) +template +auto get_dataframe_buffer_cbegin_tuple_impl(std::index_sequence, TupleType& buffer) { // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. - return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).cbegin())...)); + return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).cbegin())...)); } -template -auto get_dataframe_buffer_cend_tuple_impl(std::index_sequence, TupleType& buffer) +template +auto get_dataframe_buffer_cend_tuple_impl(std::index_sequence, TupleType& buffer) { // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. - return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).cend())...)); + return thrust::make_zip_iterator(thrust::make_tuple((std::get(buffer).cend())...)); } } // namespace detail diff --git a/cpp/src/c_api/graph_helper_impl.cuh b/cpp/src/c_api/graph_helper_impl.cuh index a0614c5e08e..6377fde8fee 100644 --- a/cpp/src/c_api/graph_helper_impl.cuh +++ b/cpp/src/c_api/graph_helper_impl.cuh @@ -37,7 +37,7 @@ edge_property_t create_constant_edge_property(raft::handle_t c { edge_property_t edge_property(handle, graph_view); - cugraph::fill_edge_property(handle, graph_view, constant_value, edge_property); + cugraph::fill_edge_property(handle, graph_view, edge_property.mutable_view(), constant_value); return edge_property; } diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 7b68d88abea..8ae49ed207c 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -23,7 +23,7 @@ #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_v.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -130,8 +130,8 @@ std::tuple, rmm::device_uvector> brandes_b edge_t hop{0}; while (true) { - update_edge_src_property(handle, graph_view, sigmas.begin(), src_sigmas); - update_edge_dst_property(handle, graph_view, distances.begin(), dst_distances); + update_edge_src_property(handle, graph_view, sigmas.begin(), src_sigmas.mutable_view()); + update_edge_dst_property(handle, graph_view, distances.begin(), dst_distances.mutable_view()); auto [new_frontier, new_sigma] = transform_reduce_v_frontier_outgoing_e_by_dst(handle, @@ -228,12 +228,12 @@ void accumulate_vertex_results( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - src_properties); + src_properties.mutable_view()); update_edge_dst_property( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - dst_properties); + dst_properties.mutable_view()); // FIXME: To do this efficiently, I need a version of // per_v_transform_reduce_outgoing_e that takes a vertex list @@ -272,12 +272,12 @@ void accumulate_vertex_results( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - src_properties); + src_properties.mutable_view()); update_edge_dst_property( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - dst_properties); + dst_properties.mutable_view()); thrust::transform(handle.get_thrust_policy(), centralities.begin(), @@ -323,12 +323,12 @@ void accumulate_edge_results( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - src_properties); + src_properties.mutable_view()); update_edge_dst_property( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - dst_properties); + dst_properties.mutable_view()); // // For now this will do a O(E) pass over all edges over the diameter @@ -417,12 +417,12 @@ void accumulate_edge_results( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - src_properties); + src_properties.mutable_view()); update_edge_dst_property( handle, graph_view, thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), - dst_properties); + dst_properties.mutable_view()); } } @@ -594,9 +594,11 @@ edge_betweenness_centrality( if (graph_view.has_edge_mask()) { auto unmasked_graph_view = graph_view; unmasked_graph_view.clear_edge_mask(); - fill_edge_property(handle, unmasked_graph_view, weight_t{0}, centralities, do_expensive_check); + fill_edge_property( + handle, unmasked_graph_view, centralities.mutable_view(), weight_t{0}, do_expensive_check); } else { - fill_edge_property(handle, graph_view, weight_t{0}, centralities, do_expensive_check); + fill_edge_property( + handle, graph_view, centralities.mutable_view(), weight_t{0}, do_expensive_check); } size_t num_sources = thrust::distance(vertices_begin, vertices_end); diff --git a/cpp/src/centrality/eigenvector_centrality_impl.cuh b/cpp/src/centrality/eigenvector_centrality_impl.cuh index 09436b68189..922f5b44f83 100644 --- a/cpp/src/centrality/eigenvector_centrality_impl.cuh +++ b/cpp/src/centrality/eigenvector_centrality_impl.cuh @@ -98,7 +98,7 @@ rmm::device_uvector eigenvector_centrality( old_centralities.data()); update_edge_src_property( - handle, pull_graph_view, old_centralities.begin(), edge_src_centralities); + handle, pull_graph_view, old_centralities.begin(), edge_src_centralities.mutable_view()); if (edge_weight_view) { per_v_transform_reduce_incoming_e( diff --git a/cpp/src/centrality/katz_centrality_impl.cuh b/cpp/src/centrality/katz_centrality_impl.cuh index 132d7aa5039..953e0ee381c 100644 --- a/cpp/src/centrality/katz_centrality_impl.cuh +++ b/cpp/src/centrality/katz_centrality_impl.cuh @@ -107,7 +107,7 @@ void katz_centrality( std::swap(new_katz_centralities, old_katz_centralities); update_edge_src_property( - handle, pull_graph_view, old_katz_centralities, edge_src_katz_centralities); + handle, pull_graph_view, old_katz_centralities, edge_src_katz_centralities.mutable_view()); if (edge_weight_view) { per_v_transform_reduce_incoming_e( diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh index e693beee489..a0ccfa52ffc 100644 --- a/cpp/src/community/approx_weighted_matching_impl.cuh +++ b/cpp/src/community/approx_weighted_matching_impl.cuh @@ -50,9 +50,11 @@ std::tuple, weight_t> approximate_weighted_matchin if (current_graph_view.has_edge_mask()) { current_graph_view.clear_edge_mask(); } cugraph::edge_property_t edge_masks_even(handle, current_graph_view); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_even.mutable_view(), bool{false}); cugraph::edge_property_t edge_masks_odd(handle, current_graph_view); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_odd.mutable_view(), bool{false}); if (graph_view.has_edge_mask()) { current_graph_view.attach_edge_mask(*(graph_view.edge_mask_view())); @@ -99,7 +101,8 @@ std::tuple, weight_t> approximate_weighted_matchin if constexpr (graph_view_t::is_multi_gpu) { src_key_cache = edge_src_property_t(handle, current_graph_view); - update_edge_src_property(handle, current_graph_view, local_vertices.begin(), src_key_cache); + update_edge_src_property( + handle, current_graph_view, local_vertices.begin(), src_key_cache.mutable_view()); src_match_flags = cugraph::edge_src_property_t(handle, current_graph_view); dst_match_flags = cugraph::edge_dst_property_t(handle, current_graph_view); @@ -296,9 +299,9 @@ std::tuple, weight_t> approximate_weighted_matchin if constexpr (graph_view_t::is_multi_gpu) { cugraph::update_edge_src_property( - handle, current_graph_view, is_vertex_matched.begin(), src_match_flags); + handle, current_graph_view, is_vertex_matched.begin(), src_match_flags.mutable_view()); cugraph::update_edge_dst_property( - handle, current_graph_view, is_vertex_matched.begin(), dst_match_flags); + handle, current_graph_view, is_vertex_matched.begin(), dst_match_flags.mutable_view()); } if (loop_counter % 2 == 0) { @@ -330,7 +333,8 @@ std::tuple, weight_t> approximate_weighted_matchin } if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_even.mutable_view(), bool{false}); current_graph_view.attach_edge_mask(edge_masks_odd.view()); } else { if constexpr (graph_view_t::is_multi_gpu) { @@ -361,7 +365,8 @@ std::tuple, weight_t> approximate_weighted_matchin } if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_odd.mutable_view(), bool{false}); current_graph_view.attach_edge_mask(edge_masks_even.view()); } diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index dcad4e92b95..e17abdb3703 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -299,7 +299,7 @@ rmm::device_uvector update_clustering_by_delta_modularity( edge_src_property_t, weight_t>(handle, graph_view); update_edge_src_property( - handle, graph_view, vertex_cluster_weights_v.begin(), src_cluster_weights); + handle, graph_view, vertex_cluster_weights_v.begin(), src_cluster_weights.mutable_view()); vertex_cluster_weights_v.resize(0, handle.get_stream()); vertex_cluster_weights_v.shrink_to_fit(handle.get_stream()); } else { @@ -367,7 +367,7 @@ rmm::device_uvector update_clustering_by_delta_modularity( graph_view, thrust::make_zip_iterator(thrust::make_tuple( old_cluster_sum_v.begin(), cluster_subtract_v.begin())), - src_old_cluster_sum_subtract_pairs); + src_old_cluster_sum_subtract_pairs.mutable_view()); old_cluster_sum_v.resize(0, handle.get_stream()); old_cluster_sum_v.shrink_to_fit(handle.get_stream()); cluster_subtract_v.resize(0, handle.get_stream()); diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh index 82d20a04203..85892f711ba 100644 --- a/cpp/src/community/detail/maximal_independent_moves.cuh +++ b/cpp/src/community/detail/maximal_independent_moves.cuh @@ -163,8 +163,10 @@ rmm::device_uvector maximal_independent_moves( if constexpr (multi_gpu) { src_rank_cache = edge_src_property_t(handle, graph_view); dst_rank_cache = edge_dst_property_t(handle, graph_view); - update_edge_src_property(handle, graph_view, temporary_ranks.begin(), src_rank_cache); - update_edge_dst_property(handle, graph_view, temporary_ranks.begin(), dst_rank_cache); + update_edge_src_property( + handle, graph_view, temporary_ranks.begin(), src_rank_cache.mutable_view()); + update_edge_dst_property( + handle, graph_view, temporary_ranks.begin(), dst_rank_cache.mutable_view()); } // diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index ef34ad90584..99fc1cd6fae 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -262,12 +262,16 @@ refine_clustering( // Update cluster weight, weighted degree and cut for edge sources src_louvain_cluster_weight_cache = edge_src_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, vertex_louvain_cluster_weights.begin(), src_louvain_cluster_weight_cache); + update_edge_src_property(handle, + graph_view, + vertex_louvain_cluster_weights.begin(), + src_louvain_cluster_weight_cache.mutable_view()); src_cut_to_louvain_cache = edge_src_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, weighted_cut_of_vertices_to_louvain.begin(), src_cut_to_louvain_cache); + update_edge_src_property(handle, + graph_view, + weighted_cut_of_vertices_to_louvain.begin(), + src_cut_to_louvain_cache.mutable_view()); vertex_louvain_cluster_weights.resize(0, handle.get_stream()); vertex_louvain_cluster_weights.shrink_to_fit(handle.get_stream()); @@ -329,15 +333,15 @@ refine_clustering( edge_src_property_t(handle, graph_view); update_edge_src_property( - handle, graph_view, leiden_assignment.begin(), src_leiden_assignment_cache); + handle, graph_view, leiden_assignment.begin(), src_leiden_assignment_cache.mutable_view()); update_edge_dst_property( - handle, graph_view, leiden_assignment.begin(), dst_leiden_assignment_cache); + handle, graph_view, leiden_assignment.begin(), dst_leiden_assignment_cache.mutable_view()); update_edge_src_property(handle, graph_view, singleton_and_connected_flags.begin(), - src_singleton_and_connected_flag_cache); + src_singleton_and_connected_flag_cache.mutable_view()); } auto src_input_property_values = diff --git a/cpp/src/community/ecg_impl.cuh b/cpp/src/community/ecg_impl.cuh index 2698f3cad4d..d01b13f0b35 100644 --- a/cpp/src/community/ecg_impl.cuh +++ b/cpp/src/community/ecg_impl.cuh @@ -64,7 +64,8 @@ std::tuple, size_t, weight_t> ecg( edge_dst_property_t dst_cluster_assignments(handle, graph_view); edge_property_t modified_edge_weights(handle, graph_view); - cugraph::fill_edge_property(handle, graph_view, weight_t{0}, modified_edge_weights); + cugraph::fill_edge_property( + handle, graph_view, modified_edge_weights.mutable_view(), weight_t{0}); weight_t modularity = -1.0; rmm::device_uvector cluster_assignments(graph_view.local_vertex_partition_range_size(), @@ -82,9 +83,9 @@ std::tuple, size_t, weight_t> ecg( resolution); cugraph::update_edge_src_property( - handle, graph_view, cluster_assignments.begin(), src_cluster_assignments); + handle, graph_view, cluster_assignments.begin(), src_cluster_assignments.mutable_view()); cugraph::update_edge_dst_property( - handle, graph_view, cluster_assignments.begin(), dst_cluster_assignments); + handle, graph_view, cluster_assignments.begin(), dst_cluster_assignments.mutable_view()); cugraph::transform_e( handle, @@ -128,9 +129,9 @@ std::tuple, size_t, weight_t> ecg( if constexpr (multi_gpu) { cugraph::update_edge_src_property( - handle, graph_view, cluster_assignments.begin(), src_cluster_assignments); + handle, graph_view, cluster_assignments.begin(), src_cluster_assignments.mutable_view()); cugraph::update_edge_dst_property( - handle, graph_view, cluster_assignments.begin(), dst_cluster_assignments); + handle, graph_view, cluster_assignments.begin(), dst_cluster_assignments.mutable_view()); } auto [cluster_keys, cluster_weights] = cugraph::detail::compute_cluster_keys_and_values( diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index f830e6a7700..f2d4c04dffd 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -595,8 +595,10 @@ k_truss(raft::handle_t const& handle, cur_graph_view); edge_dst_property_t edge_dst_out_degrees(handle, cur_graph_view); - update_edge_src_property(handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees); - update_edge_dst_property(handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees); + update_edge_src_property( + handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); + update_edge_dst_property( + handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); rmm::device_uvector srcs(0, handle.get_stream()); rmm::device_uvector dsts(0, handle.get_stream()); @@ -695,7 +697,7 @@ k_truss(raft::handle_t const& handle, (*num_triangles).begin()); cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); + cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), true); cur_graph_view.attach_edge_mask(edge_mask.view()); while (true) { diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 166eb334301..da790a5dd66 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -236,8 +236,10 @@ std::pair>, weight_t> leiden( if constexpr (graph_view_t::is_multi_gpu) { src_vertex_weights_cache = edge_src_property_t(handle, current_graph_view); - update_edge_src_property( - handle, current_graph_view, vertex_weights.begin(), src_vertex_weights_cache); + update_edge_src_property(handle, + current_graph_view, + vertex_weights.begin(), + src_vertex_weights_cache.mutable_view()); } #ifdef TIMING @@ -265,13 +267,13 @@ std::pair>, weight_t> leiden( update_edge_src_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - src_louvain_assignment_cache); + src_louvain_assignment_cache.mutable_view()); dst_louvain_assignment_cache = edge_dst_property_t(handle, current_graph_view); update_edge_dst_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - dst_louvain_assignment_cache); + dst_louvain_assignment_cache.mutable_view()); } weight_t new_Q = detail::compute_modularity(handle, @@ -328,11 +330,11 @@ std::pair>, weight_t> leiden( update_edge_src_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - src_louvain_assignment_cache); + src_louvain_assignment_cache.mutable_view()); update_edge_dst_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - dst_louvain_assignment_cache); + dst_louvain_assignment_cache.mutable_view()); } std::tie(cluster_keys, cluster_weights) = @@ -403,11 +405,11 @@ std::pair>, weight_t> leiden( update_edge_src_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - src_louvain_assignment_cache); + src_louvain_assignment_cache.mutable_view()); update_edge_dst_property(handle, current_graph_view, louvain_assignment_for_vertices.begin(), - dst_louvain_assignment_cache); + dst_louvain_assignment_cache.mutable_view()); } std::tie(refined_leiden_partition, leiden_to_louvain_map) = diff --git a/cpp/src/community/louvain_impl.cuh b/cpp/src/community/louvain_impl.cuh index a4b4b4a7bcd..ef90e8fd555 100644 --- a/cpp/src/community/louvain_impl.cuh +++ b/cpp/src/community/louvain_impl.cuh @@ -138,8 +138,10 @@ std::pair>, weight_t> louvain( src_vertex_weights_cache = edge_src_property_t(handle, current_graph_view); - update_edge_src_property( - handle, current_graph_view, vertex_weights_v.begin(), src_vertex_weights_cache); + update_edge_src_property(handle, + current_graph_view, + vertex_weights_v.begin(), + src_vertex_weights_cache.mutable_view()); vertex_weights_v.resize(0, handle.get_stream()); vertex_weights_v.shrink_to_fit(handle.get_stream()); } @@ -167,10 +169,10 @@ std::pair>, weight_t> louvain( if constexpr (multi_gpu) { src_clusters_cache = edge_src_property_t(handle, current_graph_view); update_edge_src_property( - handle, current_graph_view, next_clusters_v.begin(), src_clusters_cache); + handle, current_graph_view, next_clusters_v.begin(), src_clusters_cache.mutable_view()); dst_clusters_cache = edge_dst_property_t(handle, current_graph_view); update_edge_dst_property( - handle, current_graph_view, next_clusters_v.begin(), dst_clusters_cache); + handle, current_graph_view, next_clusters_v.begin(), dst_clusters_cache.mutable_view()); } weight_t new_Q = detail::compute_modularity(handle, @@ -208,9 +210,9 @@ std::pair>, weight_t> louvain( if constexpr (graph_view_t::is_multi_gpu) { update_edge_src_property( - handle, current_graph_view, next_clusters_v.begin(), src_clusters_cache); + handle, current_graph_view, next_clusters_v.begin(), src_clusters_cache.mutable_view()); update_edge_dst_property( - handle, current_graph_view, next_clusters_v.begin(), dst_clusters_cache); + handle, current_graph_view, next_clusters_v.begin(), dst_clusters_cache.mutable_view()); } std::tie(cluster_keys_v, cluster_weights_v) = detail::compute_cluster_keys_and_values( diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh index 7bccfafa043..0b453cfe262 100644 --- a/cpp/src/community/triangle_count_impl.cuh +++ b/cpp/src/community/triangle_count_impl.cuh @@ -194,7 +194,8 @@ void triangle_count(raft::handle_t const& handle, if (vertices) { cugraph::edge_property_t within_two_hop_edge_mask( handle, cur_graph_view); - cugraph::fill_edge_property(handle, unmasked_cur_graph_view, false, within_two_hop_edge_mask); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, within_two_hop_edge_mask.mutable_view(), false); rmm::device_uvector unique_vertices((*vertices).size(), handle.get_stream()); thrust::copy( @@ -322,10 +323,14 @@ void triangle_count(raft::handle_t const& handle, handle, cur_graph_view); edge_dst_property_t edge_dst_within_two_hop_flags( handle, cur_graph_view); - update_edge_src_property( - handle, cur_graph_view, within_two_hop_flags.begin(), edge_src_within_two_hop_flags); - update_edge_dst_property( - handle, cur_graph_view, within_two_hop_flags.begin(), edge_dst_within_two_hop_flags); + update_edge_src_property(handle, + cur_graph_view, + within_two_hop_flags.begin(), + edge_src_within_two_hop_flags.mutable_view()); + update_edge_dst_property(handle, + cur_graph_view, + within_two_hop_flags.begin(), + edge_dst_within_two_hop_flags.mutable_view()); transform_e( handle, @@ -348,7 +353,8 @@ void triangle_count(raft::handle_t const& handle, { cugraph::edge_property_t self_loop_edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, unmasked_cur_graph_view, false, self_loop_edge_mask); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false); transform_e( handle, @@ -369,7 +375,8 @@ void triangle_count(raft::handle_t const& handle, { cugraph::edge_property_t in_two_core_edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, unmasked_cur_graph_view, false, in_two_core_edge_mask); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, in_two_core_edge_mask.mutable_view(), false); rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), handle.get_stream()); @@ -388,9 +395,9 @@ void triangle_count(raft::handle_t const& handle, in_two_core_first + core_numbers.size(), in_two_core_flags.begin()); update_edge_src_property( - handle, cur_graph_view, in_two_core_flags.begin(), edge_src_in_two_cores); + handle, cur_graph_view, in_two_core_flags.begin(), edge_src_in_two_cores.mutable_view()); update_edge_dst_property( - handle, cur_graph_view, in_two_core_flags.begin(), edge_dst_in_two_cores); + handle, cur_graph_view, in_two_core_flags.begin(), edge_dst_in_two_cores.mutable_view()); transform_e( handle, @@ -420,8 +427,10 @@ void triangle_count(raft::handle_t const& handle, cur_graph_view); edge_dst_property_t edge_dst_out_degrees(handle, cur_graph_view); - update_edge_src_property(handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees); - update_edge_dst_property(handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees); + update_edge_src_property( + handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); + update_edge_dst_property( + handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); auto [srcs, dsts] = extract_transform_e(handle, cur_graph_view, edge_src_out_degrees.view(), diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index 550edf9807a..b593c639946 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -16,7 +16,6 @@ */ #pragma once -#include "prims/fill_edge_property.cuh" #include "prims/fill_edge_src_dst_property.cuh" #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -174,8 +173,10 @@ rmm::device_uvector maximal_independent_set( if constexpr (multi_gpu) { src_rank_cache = edge_src_property_t(handle, graph_view); dst_rank_cache = edge_dst_property_t(handle, graph_view); - update_edge_src_property(handle, graph_view, temporary_ranks.begin(), src_rank_cache); - update_edge_dst_property(handle, graph_view, temporary_ranks.begin(), dst_rank_cache); + update_edge_src_property( + handle, graph_view, temporary_ranks.begin(), src_rank_cache.mutable_view()); + update_edge_dst_property( + handle, graph_view, temporary_ranks.begin(), dst_rank_cache.mutable_view()); } // diff --git a/cpp/src/components/vertex_coloring_impl.cuh b/cpp/src/components/vertex_coloring_impl.cuh index bffaab34990..fa7fb1f6099 100644 --- a/cpp/src/components/vertex_coloring_impl.cuh +++ b/cpp/src/components/vertex_coloring_impl.cuh @@ -39,10 +39,12 @@ rmm::device_uvector vertex_coloring( // edge mask cugraph::edge_property_t edge_masks_even(handle, current_graph_view); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_even.mutable_view(), bool{false}); cugraph::edge_property_t edge_masks_odd(handle, current_graph_view); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_odd.mutable_view(), bool{false}); cugraph::transform_e( handle, @@ -99,10 +101,10 @@ rmm::device_uvector vertex_coloring( cugraph::edge_dst_property_t(handle, current_graph_view); cugraph::update_edge_src_property( - handle, current_graph_view, is_vertex_in_mis.begin(), src_mis_flags); + handle, current_graph_view, is_vertex_in_mis.begin(), src_mis_flags.mutable_view()); cugraph::update_edge_dst_property( - handle, current_graph_view, is_vertex_in_mis.begin(), dst_mis_flags); + handle, current_graph_view, is_vertex_in_mis.begin(), dst_mis_flags.mutable_view()); } if (color_id % 2 == 0) { @@ -123,7 +125,8 @@ rmm::device_uvector vertex_coloring( edge_masks_odd.mutable_view()); if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_even.mutable_view(), bool{false}); current_graph_view.attach_edge_mask(edge_masks_odd.view()); } else { cugraph::transform_e( @@ -143,7 +146,8 @@ rmm::device_uvector vertex_coloring( edge_masks_even.mutable_view()); if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + cugraph::fill_edge_property( + handle, current_graph_view, edge_masks_odd.mutable_view(), bool{false}); current_graph_view.attach_edge_mask(edge_masks_even.view()); } diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index f63f28210d8..681523dad90 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/fill_edge_src_dst_property.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -488,8 +488,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle, ? edge_dst_property_t(handle, level_graph_view) : edge_dst_property_t(handle); if constexpr (GraphViewType::is_multi_gpu) { - fill_edge_dst_property( - handle, level_graph_view, invalid_component_id::value, edge_dst_components); + fill_edge_dst_property(handle, + level_graph_view, + edge_dst_components.mutable_view(), + invalid_component_id::value); } // 2.4 iterate till every vertex gets visited @@ -534,7 +536,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()), level_components, - edge_dst_components); + edge_dst_components.mutable_view()); } auto max_pushes = GraphViewType::is_multi_gpu diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index 358b0ac2e00..d807ccac5a5 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/reduce_v.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -177,7 +177,7 @@ void core_number(raft::handle_t const& handle, edge_dst_property_t, edge_t> dst_core_numbers( handle, graph_view); - update_edge_dst_property(handle, graph_view, core_numbers, dst_core_numbers); + update_edge_dst_property(handle, graph_view, core_numbers, dst_core_numbers.mutable_view()); auto k = std::max(k_first, size_t{2}); // degree 0|1 vertices belong to 0|1-core if (graph_view.is_symmetric() && (degree_type == k_core_degree_type_t::INOUT) && @@ -267,7 +267,7 @@ void core_number(raft::handle_t const& handle, vertex_frontier.bucket(bucket_idx_next).begin(), vertex_frontier.bucket(bucket_idx_next).end(), core_numbers, - dst_core_numbers); + dst_core_numbers.mutable_view()); vertex_frontier.bucket(bucket_idx_next) .resize(static_cast(thrust::distance( diff --git a/cpp/src/link_analysis/hits_impl.cuh b/cpp/src/link_analysis/hits_impl.cuh index 6cf27b8c00b..d03d80a8a75 100644 --- a/cpp/src/link_analysis/hits_impl.cuh +++ b/cpp/src/link_analysis/hits_impl.cuh @@ -105,9 +105,10 @@ std::tuple hits(raft::handle_t const& handle, // Initialize hubs from user input if provided if (has_initial_hubs_guess) { - update_edge_src_property(handle, graph_view, prev_hubs, prev_src_hubs); + update_edge_src_property(handle, graph_view, prev_hubs, prev_src_hubs.mutable_view()); } else { - fill_edge_src_property(handle, graph_view, result_t{1.0} / num_vertices, prev_src_hubs); + fill_edge_src_property( + handle, graph_view, prev_src_hubs.mutable_view(), result_t{1.0} / num_vertices); thrust::fill(handle.get_thrust_policy(), prev_hubs, prev_hubs + graph_view.local_vertex_partition_range_size(), @@ -128,7 +129,7 @@ std::tuple hits(raft::handle_t const& handle, reduce_op::plus{}, authorities); - update_edge_dst_property(handle, graph_view, authorities, curr_dst_auth); + update_edge_dst_property(handle, graph_view, authorities, curr_dst_auth.mutable_view()); // Update current source hubs property per_v_transform_reduce_outgoing_e( @@ -166,7 +167,7 @@ std::tuple hits(raft::handle_t const& handle, [] __device__(auto, auto val) { return std::abs(thrust::get<0>(val) - thrust::get<1>(val)); }, result_t{0}); - update_edge_src_property(handle, graph_view, curr_hubs, prev_src_hubs); + update_edge_src_property(handle, graph_view, curr_hubs, prev_src_hubs.mutable_view()); // Swap pointers for the next iteration // After this swap call, prev_hubs has the latest value of hubs diff --git a/cpp/src/link_analysis/pagerank_impl.cuh b/cpp/src/link_analysis/pagerank_impl.cuh index 9a7e0319de5..7976eac0e89 100644 --- a/cpp/src/link_analysis/pagerank_impl.cuh +++ b/cpp/src/link_analysis/pagerank_impl.cuh @@ -261,7 +261,8 @@ centrality_algorithm_metadata_t pagerank( return pagerank / divisor; }); - update_edge_src_property(handle, pull_graph_view, pageranks.data(), edge_src_pageranks); + update_edge_src_property( + handle, pull_graph_view, pageranks.data(), edge_src_pageranks.mutable_view()); auto unvarying_part = aggregate_personalization_vector_size == 0 ? (dangling_sum * alpha + static_cast(1.0 - alpha)) / diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 6a8882dcfab..487f31e5e03 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -299,7 +299,7 @@ all_pairs_similarity(raft::handle_t const& handle, // FIXME: If vertices is specified, this could be done on a subset of the vertices // edge_dst_property_t edge_dst_degrees(handle, graph_view); - update_edge_dst_property(handle, graph_view, degrees.begin(), edge_dst_degrees); + update_edge_dst_property(handle, graph_view, degrees.begin(), edge_dst_degrees.mutable_view()); per_v_transform_reduce_incoming_e( handle, diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 04448c9e51d..177c79ace87 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -118,6 +118,7 @@ __device__ void warp_push_buffer_elements( } template buffer_idx(*buffer_idx_ptr); - __shared__ edge_t - warp_local_degree_inclusive_sums[extract_transform_v_frontier_e_kernel_block_size]; - __shared__ edge_t warp_key_local_edge_offsets[extract_transform_v_frontier_e_kernel_block_size]; + int32_t constexpr shared_array_size = max_one_e_per_frontier_key + ? int32_t{1} /* dummy */ + : extract_transform_v_frontier_e_kernel_block_size; + __shared__ std::conditional_t + warp_local_degree_inclusive_sums[shared_array_size]; + __shared__ std::conditional_t + warp_key_local_edge_offsets[shared_array_size]; using WarpScan = cub::WarpScan; - __shared__ typename WarpScan::TempStorage temp_storage; + __shared__ std:: + conditional_t + temp_storage; auto indices = edge_partition.indices(); @@ -179,104 +186,128 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( ((static_cast(num_keys) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); while (idx < rounded_up_num_keys) { - auto min_key_idx = static_cast(idx - (idx % raft::warp_size())); // inclusive - auto max_key_idx = - static_cast(std::min(static_cast(min_key_idx) + raft::warp_size(), - static_cast(num_keys))); // exclusive - - // update warp_local_degree_inclusive_sums & warp_key_local_edge_offsets + auto call_e_op = call_e_op_with_key_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op}; + edge_t edge_offset{0}; edge_t local_degree{0}; - if (lane_id < static_cast(max_key_idx - min_key_idx)) { + if (idx < num_keys) { auto key = *(key_first + idx); auto major = thrust_tuple_get_or_identity(key); if constexpr (hypersparse) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - auto major_idx = major_start_offset + *major_hypersparse_idx; - local_degree = edge_partition.local_degree(major_idx); - warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_idx); - } else { - local_degree = edge_t{0}; - warp_key_local_edge_offsets[threadIdx.x] = edge_t{0}; // dummy + auto major_idx = major_start_offset + *major_hypersparse_idx; + edge_offset = edge_partition.local_offset(major_idx); + local_degree = edge_partition.local_degree(major_idx); } } else { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + edge_offset = edge_partition.local_offset(major_offset); local_degree = edge_partition.local_degree(major_offset); - warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_offset); } } - WarpScan(temp_storage) - .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); - __syncwarp(); - - // all the threads in a warp collectively process local edges for the keys in [key_first + - // min_key_idx, key_first + max_key_idx) - - auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + - (max_key_idx - min_key_idx) - 1]; - auto rounded_up_num_edges_this_warp = - ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * - raft::warp_size(); - - auto call_e_op = call_e_op_with_key_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op}; - auto this_warp_inclusive_sum_first = - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(); - auto this_warp_inclusive_sum_last = this_warp_inclusive_sum_first + (max_key_idx - min_key_idx); + if constexpr (max_one_e_per_frontier_key) { + // each thread processes one frontier key, exits if any edge returns a valid output - if (edge_partition_e_mask) { - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{thrust::nullopt}; + auto key = *(key_first + idx); - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - this_warp_inclusive_sum_first, - thrust::upper_bound( - thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} - : *(this_warp_inclusive_sum_first + - (key_idx_this_warp - 1)))); - if ((*edge_partition_e_mask).get(local_edge_offset)) { - auto key = *(key_first + (min_key_idx + key_idx_this_warp)); - e_op_result = call_e_op(key, local_edge_offset); + if (edge_partition_e_mask) { + for (edge_t i = 0; i < local_degree; ++i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + e_op_result = call_e_op(key, edge_offset + i); + if (e_op_result) { break; } } } - - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } else { + for (edge_t i = 0; i < local_degree; ++i) { + e_op_result = call_e_op(key, edge_offset + i); + if (e_op_result) { break; } + } } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } else { - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + auto min_key_idx = static_cast(idx - (idx % raft::warp_size())); // inclusive + auto max_key_idx = + static_cast(std::min(static_cast(min_key_idx) + raft::warp_size(), + static_cast(num_keys))); // exclusive + + // update warp_local_degree_inclusive_sums & warp_key_local_edge_offsets + + warp_key_local_edge_offsets[threadIdx.x] = edge_offset; + WarpScan(temp_storage) + .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); + __syncwarp(); + + // all the threads in a warp collectively process local edges for the keys in [key_first + + // min_key_idx, key_first + max_key_idx) + + auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + + (max_key_idx - min_key_idx) - 1]; + auto rounded_up_num_edges_this_warp = + ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); + + auto this_warp_inclusive_sum_first = + warp_local_degree_inclusive_sums + warp_id * raft::warp_size(); + auto this_warp_inclusive_sum_last = + this_warp_inclusive_sum_first + (max_key_idx - min_key_idx); + + if (edge_partition_e_mask) { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + if ((*edge_partition_e_mask).get(local_edge_offset)) { + auto key = *(key_first + (min_key_idx + key_idx_this_warp)); + e_op_result = call_e_op(key, local_edge_offset); + } + } - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - this_warp_inclusive_sum_first, - thrust::upper_bound( - thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} - : *(this_warp_inclusive_sum_first + - (key_idx_this_warp - 1)))); - auto key = *(key_first + (min_key_idx + key_idx_this_warp)); - e_op_result = call_e_op(key, local_edge_offset); + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } + } else { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + auto key = *(key_first + (min_key_idx + key_idx_this_warp)); + e_op_result = call_e_op(key, local_edge_offset); + } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } } } @@ -284,7 +315,8 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( } } -template buffer_idx(*buffer_idx_ptr); + using WarpReduce = cub::WarpReduce; + __shared__ std::conditional_t + temp_storage[max_one_e_per_frontier_key + ? (extract_transform_v_frontier_e_kernel_block_size / raft::warp_size()) + : int32_t{1} /* dummy */]; + while (idx < static_cast(thrust::distance(key_first, key_last))) { auto key = *(key_first + idx); auto major = thrust_tuple_get_or_identity(key); @@ -363,16 +403,42 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( e_op_result = call_e_op(i); } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + if constexpr (max_one_e_per_frontier_key) { + auto first_valid_lane_id = + WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) + .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); + first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); + if (lane_id == first_valid_lane_id) { + auto push_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); + push_buffer_element( + buffer_key_output_first, buffer_value_output_first, push_idx, e_op_result); + } + if (first_valid_lane_id != raft::warp_size()) { break; } + } else { + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } } } else { for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { e_op_result_t e_op_result{thrust::nullopt}; if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + if constexpr (max_one_e_per_frontier_key) { + auto first_valid_lane_id = + WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) + .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); + first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); + if (lane_id == first_valid_lane_id) { + auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); + push_buffer_element( + buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); + } + if (first_valid_lane_id != raft::warp_size()) { break; } + } else { + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } } } @@ -380,7 +446,8 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( } } -template buffer_idx(*buffer_idx_ptr); + using BlockReduce = cub::BlockReduce; + __shared__ std::conditional_t + temp_storage; + __shared__ int32_t output_thread_id; + while (idx < static_cast(thrust::distance(key_first, key_last))) { auto key = *(key_first + idx); auto major = thrust_tuple_get_or_identity(key); @@ -458,16 +532,46 @@ __global__ static void extract_transform_v_frontier_e_high_degree( e_op_result = call_e_op(i); } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + if constexpr (max_one_e_per_frontier_key) { + auto first_valid_thread_id = + BlockReduce(temp_storage) + .Reduce(e_op_result ? threadIdx.x : extract_transform_v_frontier_e_kernel_block_size, + cub::Min()); + if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } + __syncthreads(); + if (threadIdx.x == output_thread_id) { + auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); + push_buffer_element( + buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); + } + if (output_thread_id != extract_transform_v_frontier_e_kernel_block_size) { break; } + } else { + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } } } else { for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { e_op_result_t e_op_result{thrust::nullopt}; if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + if constexpr (max_one_e_per_frontier_key) { + auto first_valid_thread_id = + BlockReduce(temp_storage) + .Reduce(e_op_result ? threadIdx.x : extract_transform_v_frontier_e_kernel_block_size, + cub::Min()); + if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } + __syncthreads(); + if (threadIdx.x == output_thread_id) { + auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); + push_buffer_element( + buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); + } + if (output_thread_id != extract_transform_v_frontier_e_kernel_block_size) { break; } + } else { + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } } } @@ -477,6 +581,10 @@ __global__ static void extract_transform_v_frontier_e_high_degree( template ( @@ -699,7 +810,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_block_t update_grid(h_offsets[0], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_high_degree + extract_transform_v_frontier_e_high_degree <<>>( edge_partition, edge_partition_frontier_key_first, @@ -717,7 +828,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_warp_t update_grid(h_offsets[1] - h_offsets[0], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_mid_degree + extract_transform_v_frontier_e_mid_degree <<>>( edge_partition, edge_partition_frontier_key_first + h_offsets[0], @@ -735,7 +846,9 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(h_offsets[2] - h_offsets[1], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse_or_low_degree + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first + h_offsets[1], @@ -753,7 +866,9 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(h_offsets[3] - h_offsets[2], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse_or_low_degree + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first + h_offsets[2], @@ -773,7 +888,9 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse_or_low_degree + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first, diff --git a/cpp/src/prims/edge_bucket.cuh b/cpp/src/prims/edge_bucket.cuh index 9fbf47c3288..bca294e5628 100644 --- a/cpp/src/prims/edge_bucket.cuh +++ b/cpp/src/prims/edge_bucket.cuh @@ -92,6 +92,29 @@ class edge_bucket_t { { } + template >* = nullptr> + edge_bucket_t(raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts) + : handle_ptr_(&handle), + majors_(std::move(src_major ? srcs : dsts)), + minors_(std::move(src_major ? dsts : srcs)), + tags_(std::byte{0}) + { + } + + template >* = nullptr> + edge_bucket_t(raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + rmm::device_uvector&& tags) + : handle_ptr_(&handle), + majors_(std::move(src_major ? srcs : dsts)), + minors_(std::move(src_major ? dsts : srcs)), + tags_(std::move(tags)) + { + } + /** * @ brief insert an edge to the bucket * diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index 5741c98d90e..d51e03628e1 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -116,8 +116,8 @@ extract_transform_e(raft::handle_t const& handle, thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last())); auto value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(std::ignore, value_buffer) = - detail::extract_transform_v_frontier_e( + std::tie(std::ignore, value_buffer) = detail:: + extract_transform_v_frontier_e( handle, graph_view, frontier, diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index b9c63481420..7ad033b93c2 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -100,14 +100,14 @@ extract_transform_v_frontier_outgoing_e(raft::handle_t const& handle, auto value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); std::tie(std::ignore, value_buffer) = - detail::extract_transform_v_frontier_e(handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - do_expensive_check); + detail::extract_transform_v_frontier_e(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + do_expensive_check); return value_buffer; } diff --git a/cpp/src/prims/fill_edge_property.cuh b/cpp/src/prims/fill_edge_property.cuh index 161ec623287..54d0c454ec2 100644 --- a/cpp/src/prims/fill_edge_property.cuh +++ b/cpp/src/prims/fill_edge_property.cuh @@ -33,11 +33,11 @@ namespace cugraph { namespace detail { -template +template void fill_edge_property(raft::handle_t const& handle, GraphViewType const& graph_view, - T input, - EdgePropertyOutputWrapper edge_property_output) + EdgePropertyOutputWrapper edge_property_output, + T input) { static_assert(std::is_same_v); @@ -123,27 +123,28 @@ void fill_edge_property(raft::handle_t const& handle, * @brief Fill graph edge property values to the input value. * * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam EdgeValueOutputWrapper Type of the wrapper for output edge property values. * @tparam T Type of the edge property values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. + * @param edge_property_output edge_property_view_t class object to store edge property values (for + * the edges assigned to this process in multi-GPU). * @param input Edge property values will be set to @p input. - * @param edge_property_output edge_property_t class object to store edge property values (for the - * edges assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template +template void fill_edge_property(raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeValueOutputWrapper edge_property_output, T input, - edge_property_t& edge_property_output, bool do_expensive_check = false) { if (do_expensive_check) { // currently, nothing to do } - detail::fill_edge_property(handle, graph_view, input, edge_property_output.mutable_view()); + detail::fill_edge_property(handle, graph_view, edge_property_output, input); } } // namespace cugraph diff --git a/cpp/src/prims/fill_edge_src_dst_property.cuh b/cpp/src/prims/fill_edge_src_dst_property.cuh index 152de3f8d4a..58dbf7e74a0 100644 --- a/cpp/src/prims/fill_edge_src_dst_property.cuh +++ b/cpp/src/prims/fill_edge_src_dst_property.cuh @@ -15,29 +15,86 @@ */ #pragma once +#include +#include #include #include +#include #include +#include +#include #include #include +#include #include +#include #include +#include namespace cugraph { namespace detail { -template +template +__device__ std::enable_if_t, void> fill_thrust_tuple_element(Iterator iter, + size_t offset, + T value) +{ + packed_bool_atomic_set(iter, offset, value); +} + +template +__device__ std::enable_if_t, void> fill_thrust_tuple_element(Iterator iter, + size_t offset, + T value) +{ + *(iter + offset) = value; +} + +template +__device__ void fill_thrust_tuple(Iterator iter, size_t offset, T value, std::index_sequence) +{ + ((fill_thrust_tuple_element( + thrust::get(iter.get_iterator_tuple()), offset, thrust::get(value))), + ...); +} + +template +__device__ void fill_scalar_or_thrust_tuple(Iterator iter, size_t offset, T value) +{ + if constexpr (std::is_arithmetic_v) { + if constexpr (cugraph::is_packed_bool) { + packed_bool_atomic_set(iter, offset, value); + } else { + *(iter + offset) = value; + } + } else { + if constexpr (cugraph::has_packed_bool_element) { + fill_thrust_tuple( + iter, offset, value, std::make_index_sequence::value>()); + } else { + *(iter + offset) = value; + } + } +} + +template void fill_edge_major_property(raft::handle_t const& handle, GraphViewType const& graph_view, - T input, - EdgeMajorPropertyOutputWrapper edge_major_property_output) + EdgeMajorPropertyOutputWrapper edge_major_property_output, + T input) { + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); static_assert(std::is_same_v); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); auto keys = edge_major_property_output.keys(); auto value_firsts = edge_major_property_output.value_firsts(); @@ -54,10 +111,7 @@ void fill_edge_major_property(raft::handle_t const& handle, static_cast(graph_view.local_edge_partition_src_range_size(i)); } } - if constexpr (cugraph::has_packed_bool_element< - std::remove_reference_t, - T>()) { - static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + if constexpr (contains_packed_bool_element) { auto packed_input = input ? packed_bool_full_mask() : packed_bool_empty_mask(); thrust::fill_n(handle.get_thrust_policy(), value_firsts[i], @@ -69,12 +123,139 @@ void fill_edge_major_property(raft::handle_t const& handle, } } -template +template +void fill_edge_major_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + EdgeMajorPropertyOutputWrapper edge_major_property_output, + T input) +{ + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(std::is_same_v); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + + auto edge_partition_value_firsts = edge_major_property_output.value_firsts(); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(minor_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + auto max_rx_size = + std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { + return std::max(lhs, rhs); + }); + rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); + + auto edge_partition_keys = edge_major_property_output.keys(); + for (int i = 0; i < minor_comm_size; ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + + device_bcast( + minor_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + + if (edge_partition_keys) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_counts[i]), + [rx_vertex_first = rx_vertices.begin(), + input, + edge_partition_key_first = ((*edge_partition_keys)[i]).begin(), + edge_partition_key_last = ((*edge_partition_keys)[i]).end(), + edge_partition_value_first = edge_partition_value_firsts[i]] __device__(size_t i) { + auto major = *(rx_vertex_first + i); + auto it = thrust::lower_bound( + thrust::seq, edge_partition_key_first, edge_partition_key_last, major); + if ((it != edge_partition_key_last) && (*it == major)) { + auto edge_partition_offset = thrust::distance(edge_partition_key_first, it); + if constexpr (contains_packed_bool_element) { + packe_bool_atomic_set(edge_partition_value_first, edge_partition_offset, input); + } else { + *(edge_partition_value_first + edge_partition_offset) = input; + } + } + }); + } else { + if constexpr (contains_packed_bool_element) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(static_cast(rx_counts[i])), + [edge_partition, + rx_vertex_first = rx_vertices.begin(), + input, + output_value_first = edge_partition_value_firsts[i]] __device__(auto i) { + auto rx_vertex = *(rx_vertex_first + i); + auto major_offset = edge_partition.major_offset_from_major_nocheck(rx_vertex); + packed_bool_atomic_set(output_value_first, major_offset, input); + }); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { + return edge_partition.major_offset_from_major_nocheck(v); + })); + auto val_first = thrust::make_constant_iterator(input); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + rx_counts[i], + map_first, + edge_partition_value_firsts[i]); + } + } + } + } else { + assert(graph_view.local_vertex_partition_range_size() == GraphViewType::is_storage_transposed + ? graph_view.local_edge_partition_dst_range_size() + : graph_view.local_edge_partition_src_range_size()); + assert(edge_partition_value_firsts.size() == size_t{1}); + if constexpr (contains_packed_bool_element) { + thrust::for_each(handle.get_thrust_policy(), + vertex_first, + vertex_last, + [input, output_value_first = edge_partition_value_firsts[0]] __device__( + auto v) { packed_bool_atomic_set(output_value_first, v, input); }); + } else { + auto val_first = thrust::make_constant_iterator(input); + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + edge_partition_value_firsts[0]); + } + } +} + +template void fill_edge_minor_property(raft::handle_t const& handle, GraphViewType const& graph_view, - T input, - EdgeMinorPropertyOutputWrapper edge_minor_property_output) + EdgeMinorPropertyOutputWrapper edge_minor_property_output, + T input) { + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); static_assert(std::is_same_v); auto keys = edge_minor_property_output.keys(); @@ -89,7 +270,7 @@ void fill_edge_minor_property(raft::handle_t const& handle, } } auto value_first = edge_minor_property_output.value_first(); - if constexpr (cugraph::has_packed_bool_element()) { + if constexpr (contains_packed_bool_element) { static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); auto packed_input = input ? packed_bool_full_mask() : packed_bool_empty_mask(); thrust::fill_n( @@ -99,6 +280,136 @@ void fill_edge_minor_property(raft::handle_t const& handle, } } +template +void fill_edge_minor_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + EdgeMinorPropertyOutputWrapper edge_minor_property_output, + T input) +{ + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(std::is_same_v); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + + auto edge_partition_value_first = edge_minor_property_output.value_first(); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + auto const major_comm_size = major_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(major_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + auto max_rx_size = + std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { + return std::max(lhs, rhs); + }); + rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); + + std::optional> key_offsets{}; + if constexpr (GraphViewType::is_storage_transposed) { + key_offsets = graph_view.local_sorted_unique_edge_src_vertex_partition_offsets(); + } else { + key_offsets = graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets(); + } + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(size_t{0})); + auto edge_partition_keys = edge_minor_property_output.keys(); + for (int i = 0; i < major_comm_size; ++i) { + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + major_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + + if (edge_partition_keys) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_counts[i]), + [rx_vertex_first = rx_vertices.begin(), + input, + subrange_key_first = (*edge_partition_keys).begin() + (*key_offsets)[i], + subrange_key_last = (*edge_partition_keys).begin() + (*key_offsets)[i + 1], + edge_partition_value_first = edge_partition_value_first, + subrange_start_offset = (*key_offsets)[i]] __device__(auto i) { + auto minor = *(rx_vertex_first + i); + auto it = + thrust::lower_bound(thrust::seq, subrange_key_first, subrange_key_last, minor); + if ((it != subrange_key_last) && (*it == minor)) { + auto subrange_offset = thrust::distance(subrange_key_first, it); + if constexpr (contains_packed_bool_element) { + fill_scalar_or_thrust_tuple( + edge_partition_value_first, subrange_start_offset + subrange_offset, input); + } else { + *(edge_partition_value_first + subrange_start_offset + subrange_offset) = input; + } + } + }); + } else { + if constexpr (contains_packed_bool_element) { + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(static_cast(rx_counts[i])), + [edge_partition, + rx_vertex_first = rx_vertices.begin(), + input, + output_value_first = edge_partition_value_first] __device__(auto i) { + auto rx_vertex = *(rx_vertex_first + i); + auto minor_offset = + edge_partition.minor_offset_from_minor_nocheck(rx_vertex); + fill_scalar_or_thrust_tuple(output_value_first, minor_offset, input); + }); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { + return edge_partition.minor_offset_from_minor_nocheck(v); + })); + auto val_first = thrust::make_constant_iterator(input); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + rx_counts[i], + map_first, + edge_partition_value_first); + } + } + } + } else { + assert(graph_view.local_vertex_partition_range_size() == + graph_view.local_edge_partition_src_range_size()); + if constexpr (contains_packed_bool_element) { + thrust::for_each(handle.get_thrust_policy(), + vertex_first, + vertex_last, + [input, output_value_first = edge_partition_value_first] __device__(auto v) { + fill_scalar_or_thrust_tuple(output_value_first, v, input); + }); + } else { + auto val_first = thrust::make_constant_iterator(input); + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + edge_partition_value_first); + } + } +} + } // namespace detail /** @@ -108,32 +419,96 @@ void fill_edge_minor_property(raft::handle_t const& handle, * to this process in multi-GPU). * * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam EdgeSrcValueOutputWrapper Type of the wrapper for output edge source property values. * @tparam T Type of the edge source property values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. + * @param edge_src_property_output edge_src_property_view_t class object to store source property + * values (for the edge source assigned to this process in multi-GPU). * @param input Edge source property values will be set to @p input. - * @param edge_src_property_output edge_src_property_t class object to store source property values - * (for the edge source assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template +template void fill_edge_src_property(raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeSrcValueOutputWrapper edge_src_property_output, T input, - edge_src_property_t& edge_src_property_output, bool do_expensive_check = false) { + static_assert(std::is_same_v); if (do_expensive_check) { // currently, nothing to do } + if constexpr (GraphViewType::is_storage_transposed) { + detail::fill_edge_minor_property(handle, graph_view, edge_src_property_output, input); + } else { + detail::fill_edge_major_property(handle, graph_view, edge_src_property_output, input); + } +} + +/** + * @brief Fill graph edge source property values to the input value. + * + * This version fills only a subset of graph edge source property values. [@p vertex_first, + * @p vertex_last) specifies the vertices to be filled. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam EdgeSrcValueOutputWrapper Type of the wrapper for output edge source property values. + * @tparam T Type of the edge source property values. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with a value to be filled. + * v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex partition + * assigned to this process in multi-GPU), otherwise undefined behavior. + * @param vertex_last Iterator pointing to the last (exclusive) vertex with a value to be filled. + * @param edge_src_property_output edge_src_property_view_t class object to store source property + * values (for the edge source assigned to this process in multi-GPU). + * @param input Edge source property values will be set to @p input. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void fill_edge_src_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + EdgeSrcValueOutputWrapper edge_src_property_output, + T input, + bool do_expensive_check = false) +{ + static_assert(std::is_same_v); + if (do_expensive_check) { + auto num_invalids = thrust::count_if( + handle.get_thrust_policy(), + vertex_first, + vertex_last, + [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), + local_vertex_partition_range_last = + graph_view.local_vertex_partition_range_last()] __device__(auto v) { + return (v < local_vertex_partition_range_first) || (v >= local_vertex_partition_range_last); + }); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + num_invalids = + host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS( + num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + } + if constexpr (GraphViewType::is_storage_transposed) { detail::fill_edge_minor_property( - handle, graph_view, input, edge_src_property_output.mutable_view()); + handle, graph_view, vertex_first, vertex_last, edge_src_property_output, input); } else { detail::fill_edge_major_property( - handle, graph_view, input, edge_src_property_output.mutable_view()); + handle, graph_view, vertex_first, vertex_last, edge_src_property_output, input); } } @@ -144,32 +519,98 @@ void fill_edge_src_property(raft::handle_t const& handle, * (assigned to this process in multi-GPU). * * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam EdgeDstValueOutputWrapper Type of the wrapper for output edge destination property + * values. * @tparam T Type of the edge destination property values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. + * @param edge_dst_property_output edge_dst_property_view_t class object to store destination + * property values (for the edge destinations assigned to this process in multi-GPU). * @param input Edge destination property values will be set to @p input. - * @param edge_dst_property_output edge_dst_property_t class object to store destination property - * values (for the edge destinations assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template +template void fill_edge_dst_property(raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeDstValueOutputWrapper edge_dst_property_output, T input, - edge_dst_property_t& edge_dst_property_output, bool do_expensive_check = false) { + static_assert(std::is_same_v); if (do_expensive_check) { // currently, nothing to do } + if constexpr (GraphViewType::is_storage_transposed) { + detail::fill_edge_major_property(handle, graph_view, edge_dst_property_output, input); + } else { + detail::fill_edge_minor_property(handle, graph_view, edge_dst_property_output, input); + } +} + +/** + * @brief Fill graph edge destination property values to the input value. + * + * This version fills only a subset of graph edge destination property values. [@p vertex_first, + * @p vertex_last) specifies the vertices to be filled. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam EdgeDstValueOutputWrapper Type of the wrapper for output edge destination property + * values. + * @tparam T Type of the edge destination property values. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with a value to be filled. + * v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex partition + * assigned to this process in multi-GPU), otherwise undefined behavior. + * @param vertex_last Iterator pointing to the last (exclusive) vertex with a value to be filled. + * @param edge_dst_property_output edge_dst_property_view_t class object to store destination + * property values (for the edge destinations assigned to this process in multi-GPU). + * @param input Edge destination property values will be set to @p input. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void fill_edge_dst_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + EdgeDstValueOutputWrapper edge_dst_property_output, + T input, + bool do_expensive_check = false) +{ + static_assert(std::is_same_v); + if (do_expensive_check) { + auto num_invalids = thrust::count_if( + handle.get_thrust_policy(), + vertex_first, + vertex_last, + [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), + local_vertex_partition_range_last = + graph_view.local_vertex_partition_range_last()] __device__(auto v) { + return (v < local_vertex_partition_range_first) || (v >= local_vertex_partition_range_last); + }); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + num_invalids = + host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS( + num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + } + if constexpr (GraphViewType::is_storage_transposed) { detail::fill_edge_major_property( - handle, graph_view, input, edge_dst_property_output.mutable_view()); + handle, graph_view, vertex_first, vertex_last, edge_dst_property_output, input); } else { detail::fill_edge_minor_property( - handle, graph_view, input, edge_dst_property_output.mutable_view()); + handle, graph_view, vertex_first, vertex_last, edge_dst_property_output, input); } } diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 7253fde8d4e..015a9c683f1 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -156,6 +156,8 @@ struct transform_local_nbr_indices_t { edge_partition_e_value_input.get(edge_offset + local_nbr_idx)); } else if (invalid_value) { return *invalid_value; + } else { + return T{}; } } }; diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 509ab56d3fe..027ef1f662d 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -660,7 +660,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, auto const major_comm_rank = major_comm.get_rank(); minor_init = (major_comm_rank == 0) ? init : ReduceOp::identity_element; } - fill_edge_minor_property(handle, graph_view, minor_init, minor_tmp_buffer->mutable_view()); + fill_edge_minor_property(handle, graph_view, minor_tmp_buffer->mutable_view(), minor_init); } else { thrust::fill(handle.get_thrust_policy(), vertex_value_output_first, diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh similarity index 64% rename from cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh rename to cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh index 0432e25ae86..e58ab08fa97 100644 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh @@ -74,7 +74,8 @@ namespace detail { int32_t constexpr update_v_frontier_from_outgoing_e_kernel_block_size = 512; -template (key) : dst; if constexpr (std::is_same_v && std::is_same_v) { - return dst; + return reduce_by; } else if constexpr (std::is_same_v && !std::is_same_v) { - return thrust::make_tuple(dst, *e_op_result); + return thrust::make_tuple(reduce_by, *e_op_result); } else if constexpr (!std::is_same_v && std::is_same_v) { - return thrust::make_tuple(dst, *e_op_result); + return thrust::make_tuple(reduce_by, *e_op_result); } else { - return thrust::make_tuple(thrust::make_tuple(dst, thrust::get<0>(*e_op_result)), + return thrust::make_tuple(thrust::make_tuple(reduce_by, thrust::get<0>(*e_op_result)), thrust::get<1>(*e_op_result)); } } else { @@ -176,6 +178,151 @@ auto sort_and_reduce_buffer_elements( return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); } +template +std::conditional_t< + !std::is_same_v, + std::tuple( + 0, rmm::cuda_stream_view{})), + decltype(detail::allocate_optional_dataframe_buffer( + 0, rmm::cuda_stream_view{}))>, + decltype(allocate_dataframe_buffer( + 0, rmm::cuda_stream_view{}))> +transform_reduce_v_frontier_outgoing_e_by_src_dst(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierBucketType const& frontier, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + ReduceOp reduce_op, + bool do_expensive_check = false) +{ + static_assert(!GraphViewType::is_storage_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename VertexFrontierBucketType::key_type; + using payload_t = typename ReduceOp::value_type; + + if (do_expensive_check) { + // currently, nothing to do + } + + // 1. fill the buffer + + detail::transform_reduce_v_frontier_call_e_op_t + e_op_wrapper{e_op}; + + bool constexpr max_one_e_per_frontier_key = + reduce_by_src && std::is_same_v>; + auto [key_buffer, payload_buffer] = + detail::extract_transform_v_frontier_e( + handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op_wrapper, + do_expensive_check); + + // 2. reduce the buffer + + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); + if constexpr (GraphViewType::is_multi_gpu) { + // FIXME: this step is unnecessary if major_comm_size== 1 + auto& comm = handle.get_comms(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + std::vector h_vertex_lasts(reduce_by_src ? minor_comm_size : major_comm_size); + for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { + auto vertex_partition_id = + reduce_by_src + ? detail::compute_local_edge_partition_major_range_vertex_partition_id_t{major_comm_size, + minor_comm_size, + major_comm_rank, + minor_comm_rank}( + i) + : detail::compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); + h_vertex_lasts[i] = graph_view.vertex_partition_range_last(vertex_partition_id); + } + + rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); + raft::update_device( + d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); + rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), + handle.get_stream()); + auto reduce_by_first = + thrust_tuple_get_or_identity( + get_dataframe_buffer_begin(key_buffer)); + thrust::lower_bound(handle.get_thrust_policy(), + reduce_by_first, + reduce_by_first + size_dataframe_buffer(key_buffer), + d_vertex_lasts.begin(), + d_vertex_lasts.end(), + d_tx_buffer_last_boundaries.begin()); + std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); + raft::update_host(h_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.size(), + handle.get_stream()); + handle.sync_stream(); + std::vector tx_counts(h_tx_buffer_last_boundaries.size()); + std::adjacent_difference( + h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); + + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, std::ignore) = shuffle_values(reduce_by_src ? minor_comm : major_comm, + get_dataframe_buffer_begin(key_buffer), + tx_counts, + handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + + if constexpr (!std::is_same_v) { + auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = + shuffle_values(reduce_by_src ? minor_comm : major_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + } + + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); + } + + if constexpr (!std::is_same_v) { + return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); + } else { + return std::move(key_buffer); + } +} + } // namespace detail template @@ -259,6 +406,94 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, return ret; } +/** + * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor + * outputs by (tagged-)source ID. + * + * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are + * assumed to be tagged if VertexFrontierBucketType::key_type is a tuple of a vertex type and a tag + * type (VertexFrontierBucketType::key_type is identical to a vertex type otherwise). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexFrontierBucketType Type of the vertex frontier bucket class which abstracts the + * current (tagged-)vertex frontier. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param frontier VertexFrontierBucketType class object for the current vertex frontier. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for + * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be + * discarded); 2) dummy (but valid) thrust::optional object (e.g. + * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is + * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be + * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); + * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type + * is not void). + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @return Tuple of key values and payload values (if ReduceOp::value_type is not void) or just key + * values (if ReduceOp::value_type is void). Keys in the return values are sorted in ascending order + * using a vertex ID as the primary key and a tag (if relevant) as the secondary key. + */ +template +std::conditional_t< + !std::is_same_v, + std::tuple( + 0, rmm::cuda_stream_view{})), + decltype(detail::allocate_optional_dataframe_buffer( + 0, rmm::cuda_stream_view{}))>, + decltype(allocate_dataframe_buffer( + 0, rmm::cuda_stream_view{}))> +transform_reduce_v_frontier_outgoing_e_by_src(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierBucketType const& frontier, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + ReduceOp reduce_op, + bool do_expensive_check = false) +{ + return detail::transform_reduce_v_frontier_outgoing_e_by_src_dst(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + reduce_op, + do_expensive_check); +} + /** * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor * outputs by (tagged-)destination ID. @@ -293,13 +528,14 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). - * @param e_op Quinary operator takes edge source, edge destination, property values for the source, - * destination, and edge and returns 1) thrust::nullopt (if invalid and to be discarded); 2) dummy - * (but valid) thrust::optional object (e.g. thrust::optional{std::byte{0}}, if vertices - * are not tagged and ReduceOp::value_type is void); 3) a tag (if vertices are tagged and - * ReduceOp::value_type is void); 4) a value to be reduced using the @p reduce_op (if vertices are - * not tagged and ReduceOp::value_type is not void); or 5) a tuple of a tag and a value to be - * reduced (if vertices are tagged and ReduceOp::value_type is not void). + * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for + * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be + * discarded); 2) dummy (but valid) thrust::optional object (e.g. + * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is + * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be + * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); + * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type + * is not void). * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. * There are pre-defined reduction operators in prims/reduce_op.cuh. It is * recommended to use the pre-defined reduction operators whenever possible as the current (and @@ -335,108 +571,15 @@ transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, ReduceOp reduce_op, bool do_expensive_check = false) { - static_assert(!GraphViewType::is_storage_transposed, - "GraphViewType should support the push model."); - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using key_t = typename VertexFrontierBucketType::key_type; - using payload_t = typename ReduceOp::value_type; - - if (do_expensive_check) { - // currently, nothing to do - } - - // 1. fill the buffer - - detail::transform_reduce_v_frontier_call_e_op_t - e_op_wrapper{e_op}; - - auto [key_buffer, payload_buffer] = - detail::extract_transform_v_frontier_e(handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op_wrapper, - do_expensive_check); - - // 2. reduce the buffer - - std::tie(key_buffer, payload_buffer) = - detail::sort_and_reduce_buffer_elements( - handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); - if constexpr (GraphViewType::is_multi_gpu) { - // FIXME: this step is unnecessary if major_comm_size== 1 - auto& comm = handle.get_comms(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_rank = major_comm.get_rank(); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - auto const minor_comm_size = minor_comm.get_size(); - - std::vector h_vertex_lasts(major_comm_size); - for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { - auto minor_range_vertex_partition_id = - detail::compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - h_vertex_lasts[i] = graph_view.vertex_partition_range_last(minor_range_vertex_partition_id); - } - - rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); - raft::update_device( - d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); - rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), - handle.get_stream()); - auto dst_first = - thrust_tuple_get_or_identity( - get_dataframe_buffer_begin(key_buffer)); - thrust::lower_bound(handle.get_thrust_policy(), - dst_first, - dst_first + size_dataframe_buffer(key_buffer), - d_vertex_lasts.begin(), - d_vertex_lasts.end(), - d_tx_buffer_last_boundaries.begin()); - std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); - raft::update_host(h_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.size(), - handle.get_stream()); - handle.sync_stream(); - std::vector tx_counts(h_tx_buffer_last_boundaries.size()); - std::adjacent_difference( - h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - - auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_key_buffer, std::ignore) = shuffle_values( - major_comm, get_dataframe_buffer_begin(key_buffer), tx_counts, handle.get_stream()); - key_buffer = std::move(rx_key_buffer); - - if constexpr (!std::is_same_v) { - auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_payload_buffer, std::ignore) = shuffle_values( - major_comm, get_dataframe_buffer_begin(payload_buffer), tx_counts, handle.get_stream()); - payload_buffer = std::move(rx_payload_buffer); - } - - std::tie(key_buffer, payload_buffer) = - detail::sort_and_reduce_buffer_elements( - handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); - } - - if constexpr (!std::is_same_v) { - return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); - } else { - return std::move(key_buffer); - } + return detail::transform_reduce_v_frontier_outgoing_e_by_src_dst(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + reduce_op, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index dfac04ce848..1bfdc23c66d 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -22,10 +22,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -56,18 +58,6 @@ namespace cugraph { namespace detail { -template -__device__ void packed_bool_atomic_set(Iterator value_first, vertex_t offset, bool val) -{ - auto packed_output_offset = packed_bool_offset(offset); - auto packed_output_mask = packed_bool_mask(offset); - if (val) { - atomicOr(value_first + packed_output_offset, packed_output_mask); - } else { - atomicAnd(value_first + packed_output_offset, ~packed_output_mask); - } -} - template void pack_bools(raft::handle_t const& handle, BoolInputIterator input_first, @@ -130,8 +120,12 @@ void update_edge_major_property(raft::handle_t const& handle, VertexPropertyInputIterator vertex_property_input_first, EdgeMajorPropertyOutputWrapper edge_major_property_output) { - constexpr bool packed_bool = - std::is_same_v; + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); auto edge_partition_value_firsts = edge_major_property_output.value_firsts(); if constexpr (GraphViewType::is_multi_gpu) { @@ -157,16 +151,17 @@ void update_edge_major_property(raft::handle_t const& handle, max_rx_size, graph_view.vertex_partition_range_size(major_range_vertex_partition_id)); } auto rx_value_buffer = allocate_dataframe_buffer< - std::conditional_t>( - packed_bool ? packed_bool_size(max_rx_size) : max_rx_size, handle.get_stream()); + contains_packed_bool_element ? packed_bool_size(max_rx_size) : max_rx_size, + handle.get_stream()); auto rx_value_first = get_dataframe_buffer_begin(rx_value_buffer); for (int i = 0; i < minor_comm_size; ++i) { auto major_range_vertex_partition_id = compute_local_edge_partition_major_range_vertex_partition_id_t{ major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { if (i == minor_comm_rank) { pack_bools(handle, vertex_property_input_first, @@ -220,7 +215,7 @@ void update_edge_major_property(raft::handle_t const& handle, auto major_range_vertex_partition_id = compute_local_edge_partition_major_range_vertex_partition_id_t{ major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { if (i == minor_comm_rank) { pack_bools(handle, vertex_property_input_first, @@ -250,7 +245,7 @@ void update_edge_major_property(raft::handle_t const& handle, ? graph_view.local_edge_partition_dst_range_size() : graph_view.local_edge_partition_src_range_size()); assert(edge_partition_value_firsts.size() == size_t{1}); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { pack_bools(handle, vertex_property_input_first, vertex_property_input_first + graph_view.local_vertex_partition_range_size(), @@ -275,8 +270,12 @@ void update_edge_major_property(raft::handle_t const& handle, VertexPropertyInputIterator vertex_property_input_first, EdgeMajorPropertyOutputWrapper edge_major_property_output) { - constexpr bool packed_bool = - std::is_same_v; + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -299,9 +298,11 @@ void update_edge_major_property(raft::handle_t const& handle, }); rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); auto rx_tmp_buffer = allocate_dataframe_buffer< - std:: - conditional_t>( - packed_bool ? packed_bool_size(max_rx_size) : max_rx_size, handle.get_stream()); + std::conditional_t>( + contains_packed_bool_element ? packed_bool_size(max_rx_size) : max_rx_size, + handle.get_stream()); auto rx_value_first = get_dataframe_buffer_begin(rx_tmp_buffer); auto edge_partition_keys = edge_major_property_output.keys(); @@ -314,7 +315,7 @@ void update_edge_major_property(raft::handle_t const& handle, auto vertex_partition = vertex_partition_device_view_t( graph_view.local_vertex_partition_view()); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { auto bool_first = thrust::make_transform_iterator( vertex_first, cuda::proclaim_return_type([vertex_property_input_first, @@ -351,7 +352,7 @@ void update_edge_major_property(raft::handle_t const& handle, device_bcast(minor_comm, rx_value_first, rx_value_first, - packed_bool ? packed_bool_size(rx_counts[i]) : rx_counts[i], + contains_packed_bool_element ? packed_bool_size(rx_counts[i]) : rx_counts[i], i, handle.get_stream()); @@ -370,7 +371,7 @@ void update_edge_major_property(raft::handle_t const& handle, thrust::seq, edge_partition_key_first, edge_partition_key_last, major); if ((it != edge_partition_key_last) && (*it == major)) { auto edge_partition_offset = thrust::distance(edge_partition_key_first, it); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { auto rx_value = static_cast(*(rx_value_first + packed_bool_offset(i)) & packed_bool_mask(i)); packe_bool_atomic_set(edge_partition_value_first, edge_partition_offset, rx_value); @@ -381,7 +382,7 @@ void update_edge_major_property(raft::handle_t const& handle, } }); } else { - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), @@ -417,7 +418,7 @@ void update_edge_major_property(raft::handle_t const& handle, ? graph_view.local_edge_partition_dst_range_size() : graph_view.local_edge_partition_src_range_size()); assert(edge_partition_value_firsts.size() == size_t{1}); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), vertex_first, vertex_last, @@ -445,15 +446,19 @@ void update_edge_minor_property(raft::handle_t const& handle, VertexPropertyInputIterator vertex_property_input_first, EdgeMinorPropertyOutputWrapper edge_minor_property_output) { - constexpr bool packed_bool = - std::is_same_v; + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); auto edge_partition_value_first = edge_minor_property_output.value_first(); if constexpr (GraphViewType::is_multi_gpu) { using vertex_t = typename GraphViewType::vertex_type; using bcast_buffer_type = decltype(allocate_dataframe_buffer< - std::conditional_t>( size_t{0}, handle.get_stream())); @@ -473,7 +478,7 @@ void update_edge_minor_property(raft::handle_t const& handle, // (V/comm_size) * sizeof(value_t) // and limit memory requirement to (E / comm_size) * sizeof(vertex_t) auto bcast_size = static_cast(graph_view.number_of_vertices()) / comm_size; - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { bcast_size /= 8; // bits to bytes } else { bcast_size *= sizeof(typename EdgeMinorPropertyOutputWrapper::value_type); @@ -490,7 +495,7 @@ void update_edge_minor_property(raft::handle_t const& handle, auto edge_partition_keys = edge_minor_property_output.keys(); std::optional> rx_value_buffers{std::nullopt}; - if (packed_bool || edge_partition_keys) { + if (contains_packed_bool_element || edge_partition_keys) { rx_value_buffers = std::vector{}; (*rx_value_buffers).reserve(num_concurrent_bcasts); for (size_t i = 0; i < num_concurrent_bcasts; ++i) { @@ -508,10 +513,11 @@ void update_edge_minor_property(raft::handle_t const& handle, } (*rx_value_buffers) .push_back(allocate_dataframe_buffer< - std::conditional_t>( - packed_bool ? packed_bool_size(max_size) : max_size, handle.get_stream())); + contains_packed_bool_element ? packed_bool_size(max_size) : max_size, + handle.get_stream())); } } @@ -539,7 +545,7 @@ void update_edge_minor_property(raft::handle_t const& handle, } for (size_t round = 0; round < num_rounds; ++round) { - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { for (size_t i = 0; i < num_concurrent_bcasts; ++i) { auto j = static_cast(num_rounds * i + round); if (j == major_comm_rank) { @@ -567,7 +573,7 @@ void update_edge_minor_property(raft::handle_t const& handle, rx_value_buffers ? get_dataframe_buffer_begin((*rx_value_buffers)[i]) : edge_partition_value_first + std::get>(key_offsets_or_rx_displacements)[j]; - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { device_bcast(major_comm, rx_value_first, rx_value_first, @@ -595,7 +601,7 @@ void update_edge_minor_property(raft::handle_t const& handle, compute_local_edge_partition_minor_range_vertex_partition_id_t{ major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(j); auto rx_value_first = get_dataframe_buffer_begin((*rx_value_buffers)[i]); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { if (edge_partition_keys) { auto key_offsets = std::get>(key_offsets_or_rx_displacements); @@ -657,7 +663,7 @@ void update_edge_minor_property(raft::handle_t const& handle, assert(graph_view.local_vertex_partition_range_size() == GraphViewType::is_storage_transposed ? graph_view.local_edge_partition_src_range_size() : graph_view.local_edge_partition_dst_range_size()); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { pack_bools(handle, vertex_property_input_first, vertex_property_input_first + graph_view.local_vertex_partition_range_size(), @@ -682,8 +688,12 @@ void update_edge_minor_property(raft::handle_t const& handle, VertexPropertyInputIterator vertex_property_input_first, EdgeMinorPropertyOutputWrapper edge_minor_property_output) { - constexpr bool packed_bool = - std::is_same_v; + constexpr bool contains_packed_bool_element = + cugraph::has_packed_bool_element(); + static_assert(!contains_packed_bool_element || + std::is_arithmetic_v, + "unimplemented for thrust::tuple types with a packed bool element."); using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -706,9 +716,11 @@ void update_edge_minor_property(raft::handle_t const& handle, }); rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); auto rx_tmp_buffer = allocate_dataframe_buffer< - std:: - conditional_t>( - packed_bool ? packed_bool_size(max_rx_size) : max_rx_size, handle.get_stream()); + std::conditional_t>( + contains_packed_bool_element ? packed_bool_size(max_rx_size) : max_rx_size, + handle.get_stream()); auto rx_value_first = get_dataframe_buffer_begin(rx_tmp_buffer); std::optional> key_offsets{}; @@ -727,7 +739,7 @@ void update_edge_minor_property(raft::handle_t const& handle, auto vertex_partition = vertex_partition_device_view_t( graph_view.local_vertex_partition_view()); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { auto bool_first = thrust::make_transform_iterator( vertex_first, cuda::proclaim_return_type([vertex_property_input_first, @@ -764,7 +776,7 @@ void update_edge_minor_property(raft::handle_t const& handle, device_bcast(major_comm, rx_value_first, rx_value_first, - packed_bool ? packed_bool_size(rx_counts[i]) : rx_counts[i], + contains_packed_bool_element ? packed_bool_size(rx_counts[i]) : rx_counts[i], i, handle.get_stream()); @@ -784,7 +796,7 @@ void update_edge_minor_property(raft::handle_t const& handle, thrust::lower_bound(thrust::seq, subrange_key_first, subrange_key_last, minor); if ((it != subrange_key_last) && (*it == minor)) { auto subrange_offset = thrust::distance(subrange_key_first, it); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { auto rx_value = static_cast(*(rx_value_first + packed_bool_offset(i)) & packed_bool_mask(i)); packed_bool_atomic_set( @@ -796,7 +808,7 @@ void update_edge_minor_property(raft::handle_t const& handle, } }); } else { - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), @@ -830,7 +842,7 @@ void update_edge_minor_property(raft::handle_t const& handle, } else { assert(graph_view.local_vertex_partition_range_size() == graph_view.local_edge_partition_src_range_size()); - if constexpr (packed_bool) { + if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), vertex_first, vertex_last, @@ -860,6 +872,7 @@ void update_edge_minor_property(raft::handle_t const& handle, * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPropertyInputIterator Type of the iterator for vertex property values. + * @tparam EdgeSrcValueOutputWrapper Type of the wrapper for output edge source property values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -867,19 +880,18 @@ void update_edge_minor_property(raft::handle_t const& handle, * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p * graph_view.local_vertex_partition_range_size(). - * @param edge_partition_src_property_output edge_src_property_t class object to store source + * @param edge_partition_src_property_output edge_src_property_view_t class object to store source * property values (for the edge sources assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template -void update_edge_src_property( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexPropertyInputIterator vertex_property_input_first, - edge_src_property_t::value_type>& - edge_src_property_output, - bool do_expensive_check = false) +template +void update_edge_src_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexPropertyInputIterator vertex_property_input_first, + EdgeSrcValueOutputWrapper edge_src_property_output, + bool do_expensive_check = false) { if (do_expensive_check) { // currently, nothing to do @@ -887,10 +899,10 @@ void update_edge_src_property( if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_minor_property( - handle, graph_view, vertex_property_input_first, edge_src_property_output.mutable_view()); + handle, graph_view, vertex_property_input_first, edge_src_property_output); } else { detail::update_edge_major_property( - handle, graph_view, vertex_property_input_first, edge_src_property_output.mutable_view()); + handle, graph_view, vertex_property_input_first, edge_src_property_output); } } @@ -903,6 +915,7 @@ void update_edge_src_property( * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. * @tparam VertexPropertyInputIterator Type of the iterator for vertex property values. + * @tparam EdgeSrcValueOutputWrapper Type of the wrapper for output edge source property values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -914,21 +927,21 @@ void update_edge_src_property( * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p * graph_view.local_vertex_partition_range_size(). - * @param edge_partition_src_property_output edge_src_property_t class object to store source + * @param edge_partition_src_property_output edge_src_property_view_t class object to store source * property values (for the edge sources assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template -void update_edge_src_property( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, - VertexPropertyInputIterator vertex_property_input_first, - edge_src_property_t::value_type>& - edge_src_property_output, - bool do_expensive_check = false) +template +void update_edge_src_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexPropertyInputIterator vertex_property_input_first, + EdgeSrcValueOutputWrapper edge_src_property_output, + bool do_expensive_check = false) { if (do_expensive_check) { auto num_invalids = thrust::count_if( @@ -956,14 +969,14 @@ void update_edge_src_property( vertex_first, vertex_last, vertex_property_input_first, - edge_src_property_output.mutable_view()); + edge_src_property_output); } else { detail::update_edge_major_property(handle, graph_view, vertex_first, vertex_last, vertex_property_input_first, - edge_src_property_output.mutable_view()); + edge_src_property_output); } } @@ -975,6 +988,8 @@ void update_edge_src_property( * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPropertyInputIterator Type of the iterator for vertex property values. + * @tparam EdgeDstValueOutputWrapper Type of the wrapper for output edge destination property + * values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -982,19 +997,18 @@ void update_edge_src_property( * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p * graph_view.local_vertex_partition_range_size(). - * @param edge_partition_dst_property_output edge_dst_property_t class object to store destination - * property values (for the edge destinations assigned to this process in multi-GPU). + * @param edge_partition_dst_property_output edge_dst_property_view_t class object to store + * destination property values (for the edge destinations assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template -void update_edge_dst_property( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexPropertyInputIterator vertex_property_input_first, - edge_dst_property_t::value_type>& - edge_dst_property_output, - bool do_expensive_check = false) +template +void update_edge_dst_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexPropertyInputIterator vertex_property_input_first, + EdgeDstValueOutputWrapper edge_dst_property_output, + bool do_expensive_check = false) { if (do_expensive_check) { // currently, nothing to do @@ -1002,10 +1016,10 @@ void update_edge_dst_property( if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_major_property( - handle, graph_view, vertex_property_input_first, edge_dst_property_output.mutable_view()); + handle, graph_view, vertex_property_input_first, edge_dst_property_output); } else { detail::update_edge_minor_property( - handle, graph_view, vertex_property_input_first, edge_dst_property_output.mutable_view()); + handle, graph_view, vertex_property_input_first, edge_dst_property_output); } } @@ -1018,6 +1032,8 @@ void update_edge_dst_property( * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. * @tparam VertexPropertyInputIterator Type of the iterator for vertex property values. + * @tparam EdgeDstValueOutputWrapper Type of the wrapper for output edge destination property + * values. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. @@ -1029,21 +1045,21 @@ void update_edge_dst_property( * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p * graph_view.local_vertex_partition_range_size(). - * @param edge_partition_dst_property_output edge_dst_property_t class object to store destination - * property values (for the edge destinations assigned to this process in multi-GPU). + * @param edge_partition_dst_property_output edge_dst_property_view_t class object to store + * destination property values (for the edge destinations assigned to this process in multi-GPU). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ -template -void update_edge_dst_property( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, - VertexPropertyInputIterator vertex_property_input_first, - edge_dst_property_t::value_type>& - edge_dst_property_output, - bool do_expensive_check = false) +template +void update_edge_dst_property(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexPropertyInputIterator vertex_property_input_first, + EdgeDstValueOutputWrapper edge_dst_property_output, + bool do_expensive_check = false) { if (do_expensive_check) { auto num_invalids = thrust::count_if( @@ -1071,14 +1087,14 @@ void update_edge_dst_property( vertex_first, vertex_last, vertex_property_input_first, - edge_dst_property_output.mutable_view()); + edge_dst_property_output); } else { detail::update_edge_minor_property(handle, graph_view, vertex_first, vertex_last, vertex_property_input_first, - edge_dst_property_output.mutable_view()); + edge_dst_property_output); } } diff --git a/cpp/src/prims/vertex_frontier.cuh b/cpp/src/prims/vertex_frontier.cuh index 08aadf9fa18..b13e6bfd458 100644 --- a/cpp/src/prims/vertex_frontier.cuh +++ b/cpp/src/prims/vertex_frontier.cuh @@ -43,6 +43,7 @@ #include #include #include +#include #include namespace cugraph { @@ -62,18 +63,52 @@ class key_bucket_t { static_assert(std::is_same_v || std::is_arithmetic_v); - using optional_buffer_type = std:: - conditional_t, std::byte /* dummy */, rmm::device_uvector>; + using optional_variant_type = + std::conditional_t, + std::byte /* dummy */, + std::variant, raft::device_span>>; template >* = nullptr> key_bucket_t(raft::handle_t const& handle) - : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(std::byte{0}) + : handle_ptr_(&handle), + vertices_(rmm::device_uvector(0, handle.get_stream())), + tags_(std::byte{0}) { } template >* = nullptr> key_bucket_t(raft::handle_t const& handle) - : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(0, handle.get_stream()) + : handle_ptr_(&handle), + vertices_(rmm::device_uvector(0, handle.get_stream())), + tags_(rmm::device_uvector(0, handle.get_stream())) + { + } + + template >* = nullptr> + key_bucket_t(raft::handle_t const& handle, rmm::device_uvector&& vertices) + : handle_ptr_(&handle), vertices_(std::move(vertices)), tags_(std::byte{0}) + { + } + + template >* = nullptr> + key_bucket_t(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& tags) + : handle_ptr_(&handle), vertices_(std::move(vertices)), tags_(std::move(tags)) + { + } + + template >* = nullptr> + key_bucket_t(raft::handle_t const& handle, raft::device_span vertices) + : handle_ptr_(&handle), vertices_(vertices), tags_(std::byte{0}) + { + } + + template >* = nullptr> + key_bucket_t(raft::handle_t const& handle, + raft::device_span vertices, + raft::device_span tags) + : handle_ptr_(&handle), vertices_(vertices), tags_(tags) { } @@ -85,12 +120,15 @@ class key_bucket_t { template >* = nullptr> void insert(vertex_t vertex) { - if (vertices_.size() > 0) { + CUGRAPH_EXPECTS(vertices_.index() == 0, + "insert() is supported only when this bucket holds an owning container."); + if (std::get<0>(vertices_).size() > 0) { rmm::device_scalar tmp(vertex, handle_ptr_->get_stream()); insert(tmp.data(), tmp.data() + 1); } else { - vertices_.resize(1, handle_ptr_->get_stream()); - raft::update_device(vertices_.data(), &vertex, size_t{1}, handle_ptr_->get_stream()); + std::get<0>(vertices_).resize(1, handle_ptr_->get_stream()); + raft::update_device( + std::get<0>(vertices_).data(), &vertex, size_t{1}, handle_ptr_->get_stream()); } } @@ -103,17 +141,19 @@ class key_bucket_t { template >* = nullptr> void insert(thrust::tuple key) { - if (vertices_.size() > 0) { + CUGRAPH_EXPECTS(vertices_.index() == 0, + "insert() is supported only when this bucket holds an owning container."); + if (std::get<0>(vertices_).size() > 0) { rmm::device_scalar tmp_vertex(thrust::get<0>(key), handle_ptr_->get_stream()); rmm::device_scalar tmp_tag(thrust::get<1>(key), handle_ptr_->get_stream()); auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(tmp_vertex.data(), tmp_tag.data())); insert(pair_first, pair_first + 1); } else { - vertices_.resize(1, handle_ptr_->get_stream()); - tags_.resize(1, handle_ptr_->get_stream()); - auto pair_first = - thrust::make_tuple(thrust::make_zip_iterator(vertices_.begin(), tags_.begin())); + std::get<0>(vertices_).resize(1, handle_ptr_->get_stream()); + std::get<0>(tags_).resize(1, handle_ptr_->get_stream()); + auto pair_first = thrust::make_tuple( + thrust::make_zip_iterator(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())); thrust::fill(handle_ptr_->get_thrust_policy(), pair_first, pair_first + 1, key); } } @@ -134,14 +174,16 @@ class key_bucket_t { static_assert( std::is_same_v::value_type, vertex_t>); - if (vertices_.size() > 0) { + CUGRAPH_EXPECTS(vertices_.index() == 0, + "insert() is supported only when this bucket holds an owning container."); + if (std::get<0>(vertices_).size() > 0) { if constexpr (sorted_unique) { rmm::device_uvector merged_vertices( - vertices_.size() + thrust::distance(vertex_first, vertex_last), + std::get<0>(vertices_).size() + thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); thrust::merge(handle_ptr_->get_thrust_policy(), - vertices_.begin(), - vertices_.end(), + std::get<0>(vertices_).begin(), + std::get<0>(vertices_).end(), vertex_first, vertex_last, merged_vertices.begin()); @@ -150,19 +192,23 @@ class key_bucket_t { merged_vertices.begin(), merged_vertices.end())), handle_ptr_->get_stream()); - vertices_ = std::move(merged_vertices); + std::get<0>(vertices_) = std::move(merged_vertices); } else { - auto cur_size = vertices_.size(); - vertices_.resize(cur_size + thrust::distance(vertex_first, vertex_last), - handle_ptr_->get_stream()); + auto cur_size = std::get<0>(vertices_).size(); + std::get<0>(vertices_).resize(cur_size + thrust::distance(vertex_first, vertex_last), + handle_ptr_->get_stream()); thrust::copy(handle_ptr_->get_thrust_policy(), vertex_first, vertex_last, - vertices_.begin() + cur_size); + std::get<0>(vertices_).begin() + cur_size); } } else { - vertices_.resize(thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); - thrust::copy(handle_ptr_->get_thrust_policy(), vertex_first, vertex_last, vertices_.begin()); + std::get<0>(vertices_).resize(thrust::distance(vertex_first, vertex_last), + handle_ptr_->get_stream()); + thrust::copy(handle_ptr_->get_thrust_policy(), + vertex_first, + vertex_last, + std::get<0>(vertices_).begin()); } } @@ -182,18 +228,21 @@ class key_bucket_t { static_assert(std::is_same_v::value_type, thrust::tuple>); - if (vertices_.size() > 0) { + CUGRAPH_EXPECTS(vertices_.index() == 0, + "insert() is supported only when this bucket holds an owning container."); + if (std::get<0>(vertices_).size() > 0) { if constexpr (sorted_unique) { rmm::device_uvector merged_vertices( - vertices_.size() + thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + std::get<0>(vertices_).size() + thrust::distance(key_first, key_last), + handle_ptr_->get_stream()); rmm::device_uvector merged_tags(merged_vertices.size(), handle_ptr_->get_stream()); - auto old_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + auto old_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())); auto merged_pair_first = thrust::make_zip_iterator( thrust::make_tuple(merged_vertices.begin(), merged_tags.begin())); thrust::merge(handle_ptr_->get_thrust_policy(), old_pair_first, - old_pair_first + vertices_.size(), + old_pair_first + std::get<0>(vertices_).size(), key_first, key_last, merged_pair_first); @@ -204,95 +253,154 @@ class key_bucket_t { merged_pair_first + merged_vertices.size())), handle_ptr_->get_stream()); merged_tags.resize(merged_vertices.size(), handle_ptr_->get_stream()); - vertices_ = std::move(merged_vertices); - tags_ = std::move(merged_tags); + std::get<0>(vertices_) = std::move(merged_vertices); + std::get<0>(tags_) = std::move(merged_tags); } else { - auto cur_size = vertices_.size(); - vertices_.resize(cur_size + thrust::distance(key_first, key_last), - handle_ptr_->get_stream()); - tags_.resize(vertices_.size(), handle_ptr_->get_stream()); - thrust::copy( - handle_ptr_->get_thrust_policy(), - key_first, - key_last, - thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())) + - cur_size); + auto cur_size = std::get<0>(vertices_).size(); + std::get<0>(vertices_).resize(cur_size + thrust::distance(key_first, key_last), + handle_ptr_->get_stream()); + std::get<0>(tags_).resize(std::get<0>(vertices_).size(), handle_ptr_->get_stream()); + thrust::copy(handle_ptr_->get_thrust_policy(), + key_first, + key_last, + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(vertices_).begin(), + std::get<0>(tags_).begin())) + + cur_size); } } else { - vertices_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); - tags_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + std::get<0>(vertices_).resize(thrust::distance(key_first, key_last), + handle_ptr_->get_stream()); + std::get<0>(tags_).resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); thrust::copy(handle_ptr_->get_thrust_policy(), key_first, key_last, - thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin()))); + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(vertices_).begin(), + std::get<0>(tags_).begin()))); } } - size_t size() const { return vertices_.size(); } + size_t size() const + { + return vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size(); + } template std::enable_if_t aggregate_size() const { - return host_scalar_allreduce(handle_ptr_->get_comms(), - vertices_.size(), - raft::comms::op_t::SUM, - handle_ptr_->get_stream()); + return host_scalar_allreduce( + handle_ptr_->get_comms(), + vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size(), + raft::comms::op_t::SUM, + handle_ptr_->get_stream()); } template std::enable_if_t aggregate_size() const { - return vertices_.size(); + return vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size(); } void resize(size_t size) { - vertices_.resize(size, handle_ptr_->get_stream()); - if constexpr (!std::is_same_v) { tags_.resize(size, handle_ptr_->get_stream()); } + CUGRAPH_EXPECTS(vertices_.index() == 0, + "resize() is supported only when this bucket holds an owning container."); + std::get<0>(vertices_).resize(size, handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { + std::get<0>(tags_).resize(size, handle_ptr_->get_stream()); + } } - void clear() { resize(0); } + void clear() + { + CUGRAPH_EXPECTS(vertices_.index() == 0, + "clear() is supported only when this bucket holds an owning container."); + resize(0); + } void shrink_to_fit() { - vertices_.shrink_to_fit(handle_ptr_->get_stream()); - if constexpr (!std::is_same_v) { tags_.shrink_to_fit(handle_ptr_->get_stream()); } + CUGRAPH_EXPECTS( + vertices_.index() == 0, + "shrink_to_fit() is supported only when this bucket holds an owning container."); + std::get<0>(vertices_).shrink_to_fit(handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { + std::get<0>(tags_).shrink_to_fit(handle_ptr_->get_stream()); + } } auto const begin() const { if constexpr (std::is_same_v) { - return vertices_.begin(); + return vertices_.index() == 0 ? std::get<0>(vertices_).begin() + : std::get<1>(vertices_).begin(); } else { - return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + return vertices_.index() == 0 + ? thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())) + : thrust::make_zip_iterator( + thrust::make_tuple(std::get<1>(vertices_).begin(), std::get<1>(tags_).begin())); } } auto begin() { + CUGRAPH_EXPECTS( + vertices_.index() == 0, + "non-const begin() is supported only when this bucket holds an owning container."); if constexpr (std::is_same_v) { - return vertices_.begin(); + return std::get<0>(vertices_).begin(); } else { - return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + return thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())); } } - auto const end() const { return begin() + vertices_.size(); } + auto const end() const + { + return begin() + + (vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size()); + } + + auto end() + { + CUGRAPH_EXPECTS( + vertices_.index() == 0, + "non-const end() is supported only when this bucket holds an owning container."); + return begin() + std::get<0>(vertices_).size(); + } - auto end() { return begin() + vertices_.size(); } + auto const vertex_begin() const + { + return vertices_.index() == 0 ? std::get<0>(vertices_).begin() : std::get<1>(vertices_).begin(); + } - auto const vertex_begin() const { return vertices_.begin(); } + auto const vertex_end() const + { + return vertices_.index() == 0 ? std::get<0>(vertices_).end() : std::get<1>(vertices_).end(); + } - auto const vertex_end() const { return vertices_.end(); } + auto vertex_begin() + { + CUGRAPH_EXPECTS( + vertices_.index() == 0, + "non-const vertex_begin() is supported only when this bucket holds an owning container."); + return std::get<0>(vertices_).begin(); + } - auto vertex_begin() { return vertices_.begin(); } + auto vertex_end() + { + CUGRAPH_EXPECTS( + vertices_.index() == 0, + "non-const vertex_end() is supported only when this bucket holds an owning container."); + return std::get<0>(vertices_).end(); + } - auto vertex_end() { return vertices_.end(); } + bool is_owning() { return (vertices_.index() == 0); } private: raft::handle_t const* handle_ptr_{nullptr}; - rmm::device_uvector vertices_; - optional_buffer_type tags_; + std::variant, raft::device_span> vertices_; + optional_variant_type tags_; }; template (lhs) < thrust::get<0>(rhs); }); rmm::device_uvector d_indices(to_bucket_indices.size(), handle_ptr_->get_stream()); rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); + // FIXME: thrust::lower_bound & thrust::upper_bound will be faster auto it = thrust::reduce_by_key(handle_ptr_->get_thrust_policy(), bucket_idx_first, bucket_idx_last, diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index 0689dc4a53a..fb1dee1a92f 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -291,9 +291,9 @@ coarsen_graph(raft::handle_t const& handle, edge_dst_property_t, vertex_t>> edge_minor_labels(handle, graph_view); if constexpr (store_transposed) { - update_edge_src_property(handle, graph_view, labels, edge_minor_labels); + update_edge_src_property(handle, graph_view, labels, edge_minor_labels.mutable_view()); } else { - update_edge_dst_property(handle, graph_view, labels, edge_minor_labels); + update_edge_dst_property(handle, graph_view, labels, edge_minor_labels.mutable_view()); } std::vector> coarsened_edgelist_majors{}; diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 7097349dce5..5371d53bcf0 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -94,7 +94,8 @@ rmm::device_uvector compute_major_degrees( auto use_dcs = edge_partition_dcs_nzd_vertices.has_value(); - rmm::device_uvector local_degrees(0, handle.get_stream()); + rmm::device_uvector local_degrees( + 0, handle.get_stream()); // excluding globally 0 degree vertices rmm::device_uvector degrees(0, handle.get_stream()); vertex_t max_num_local_degrees{0}; @@ -104,9 +105,16 @@ rmm::device_uvector compute_major_degrees( major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); auto major_range_vertex_partition_size = partition.vertex_partition_range_size(major_range_vertex_partition_id); - max_num_local_degrees = std::max(max_num_local_degrees, major_range_vertex_partition_size); + auto segment_offset_size_per_partition = + edge_partition_segment_offsets.size() / static_cast(minor_comm_size); + auto num_local_degrees = + edge_partition_segment_offsets[segment_offset_size_per_partition * i + + (segment_offset_size_per_partition - 2)]; + max_num_local_degrees = std::max(max_num_local_degrees, num_local_degrees); if (i == minor_comm_rank) { degrees.resize(major_range_vertex_partition_size, handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), degrees.begin() + num_local_degrees, degrees.end(), edge_t{0}); } } local_degrees.resize(max_num_local_degrees, handle.get_stream()); @@ -114,20 +122,22 @@ rmm::device_uvector compute_major_degrees( auto major_range_vertex_partition_id = detail::compute_local_edge_partition_major_range_vertex_partition_id_t{ major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - vertex_t major_range_first{}; - vertex_t major_range_last{}; - std::tie(major_range_first, major_range_last) = - partition.vertex_partition_range(major_range_vertex_partition_id); + auto major_range_first = + partition.vertex_partition_range_first(major_range_vertex_partition_id); + auto offsets = edge_partition_offsets[i]; auto masks = edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt; auto segment_offset_size_per_partition = edge_partition_segment_offsets.size() / static_cast(minor_comm_size); + auto num_local_degrees = + edge_partition_segment_offsets[segment_offset_size_per_partition * i + + (segment_offset_size_per_partition - 2)]; auto major_hypersparse_first = use_dcs ? major_range_first + edge_partition_segment_offsets[segment_offset_size_per_partition * i + detail::num_sparse_segments_per_vertex_partition] - : major_range_last; + : major_range_first + num_local_degrees; auto execution_policy = handle.get_thrust_policy(); thrust::transform(execution_policy, thrust::make_counting_iterator(vertex_t{0}), @@ -145,7 +155,7 @@ rmm::device_uvector compute_major_degrees( auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; thrust::fill(execution_policy, local_degrees.begin() + (major_hypersparse_first - major_range_first), - local_degrees.begin() + (major_range_last - major_range_first), + local_degrees.begin() + num_local_degrees, edge_t{0}); thrust::for_each( execution_policy, @@ -169,7 +179,7 @@ rmm::device_uvector compute_major_degrees( } minor_comm.reduce(local_degrees.data(), i == minor_comm_rank ? degrees.data() : static_cast(nullptr), - static_cast(major_range_last - major_range_first), + static_cast(num_local_degrees), raft::comms::op_t::SUM, i, handle.get_stream()); diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index b1ce8e6f51e..a8cd2f6f50c 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -212,10 +212,9 @@ extract_induced_subgraphs( dst_subgraph_vertices = raft::device_span(dst_subgraph_vertices_v.data(), dst_subgraph_vertices_v.size()); - // 3. Call extract_transform_v_frontier_e with a functor that - // returns thrust::nullopt if the destination vertex has - // a property of 0, return the edge if the destination - // vertex has a property of 1 + // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns thrust::nullopt if + // the destination vertex has a property of 0, return the edge if the destination vertex has a + // property of 1 vertex_frontier_t vertex_frontier(handle, 1); graph_ids_v = detail::expand_sparse_offsets(subgraph_offsets, size_t{0}, handle.get_stream()); diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index fb837484a14..8a18dedd2ab 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -17,8 +17,7 @@ #include "prims/fill_edge_src_dst_property.cuh" #include "prims/reduce_op.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" -#include "prims/update_edge_src_dst_property.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -38,8 +37,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -51,33 +52,35 @@ namespace cugraph { namespace { template -struct e_op_t { +struct topdown_e_op_t { + detail::edge_partition_endpoint_property_device_view_t + prev_visited_flags{}; // visited in the previous iterations, to reduce the number of atomic + // operations detail::edge_partition_endpoint_property_device_view_t visited_flags{}; - uint32_t const* prev_visited_flags{ - nullptr}; // relevant only if multi_gpu is false (this affects only local-computing with 0 - // impact in communication volume, so this may improve performance in small-scale but - // will eat-up more memory with no benefit in performance in large-scale). - vertex_t dst_first{}; // relevant only if multi_gpu is true + vertex_t dst_first{}; __device__ thrust::optional operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - bool push{}; - if constexpr (multi_gpu) { - auto dst_offset = dst - dst_first; - auto old = visited_flags.atomic_or(dst_offset, true); - push = !old; - } else { - auto mask = uint32_t{1} << (dst % (sizeof(uint32_t) * 8)); - if (*(prev_visited_flags + packed_bool_offset(dst)) & - packed_bool_mask(dst)) { // check if unvisited in previous iterations - push = false; - } else { // check if unvisited in this iteration as well - auto old = visited_flags.atomic_or(dst, true); - push = !old; - } - } - return push ? thrust::optional{src} : thrust::nullopt; + auto dst_offset = dst - dst_first; + auto old = prev_visited_flags.get(dst_offset); + if (!old) { old = visited_flags.atomic_or(dst_offset, true); } + return old ? thrust::nullopt : thrust::optional{src}; + } +}; + +template +struct bottomup_e_op_t { + detail::edge_partition_endpoint_property_device_view_t + prev_visited_flags{}; // visited in the previous iterations + vertex_t dst_first{}; + + __device__ thrust::optional operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + { + auto dst_offset = dst - dst_first; + auto old = prev_visited_flags.get(dst_offset); + return old ? thrust::optional{dst} : thrust::nullopt; } }; @@ -87,7 +90,7 @@ namespace detail { template void bfs(raft::handle_t const& handle, - GraphViewType const& push_graph_view, + GraphViewType const& graph_view, typename GraphViewType::vertex_type* distances, PredecessorIterator predecessor_first, typename GraphViewType::vertex_type const* sources, @@ -97,35 +100,58 @@ void bfs(raft::handle_t const& handle, bool do_expensive_check) { using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; static_assert(std::is_integral::value, "GraphViewType::vertex_type should be integral."); static_assert(!GraphViewType::is_storage_transposed, "GraphViewType should support the push model."); - auto const num_vertices = push_graph_view.number_of_vertices(); + // direction optimizing BFS implementation is based on "S. Beamer, K. Asanovic, D. Patterson, + // Direction-Optimizing Breadth-First Search, 2012" + + auto const num_vertices = graph_view.number_of_vertices(); if (num_vertices == 0) { return; } // 1. check input arguments CUGRAPH_EXPECTS((n_sources == 0) || (sources != nullptr), - "Invalid input argument: sources cannot be null"); - - auto aggregate_n_sources = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce( - handle.get_comms(), n_sources, raft::comms::op_t::SUM, handle.get_stream()) - : n_sources; - CUGRAPH_EXPECTS(aggregate_n_sources > 0, - "Invalid input argument: input should have at least one source"); + "Invalid input argument: sources cannot be null if n_sources > 0."); + + if (GraphViewType::is_multi_gpu) { + if (do_expensive_check) { + auto aggregate_n_sources = host_scalar_allreduce(handle.get_comms(), + static_cast(n_sources), + raft::comms::op_t::SUM, + handle.get_stream()); + CUGRAPH_EXPECTS(aggregate_n_sources > 0, + "Invalid input argument: input should have at least one source"); + } + } else { + CUGRAPH_EXPECTS(n_sources > 0, + "Invalid input argument: input should have at least one source."); + } CUGRAPH_EXPECTS( - push_graph_view.is_symmetric() || !direction_optimizing, + graph_view.is_symmetric() || !direction_optimizing, "Invalid input argument: input graph should be symmetric for direction optimizing BFS."); + auto vertex_partition = vertex_partition_device_view_t( + graph_view.local_vertex_partition_view()); + if (do_expensive_check) { - auto vertex_partition = vertex_partition_device_view_t( - push_graph_view.local_vertex_partition_view()); + bool is_sorted = thrust::is_sorted(handle.get_thrust_policy(), sources, sources + n_sources); + if constexpr (GraphViewType::is_multi_gpu) { + is_sorted = static_cast(host_scalar_allreduce(handle.get_comms(), + static_cast(is_sorted), + raft::comms::op_t::SUM, + handle.get_stream())); + } + + CUGRAPH_EXPECTS( + is_sorted, + "Invalid input arguments: input sources should be sorted in the non-descending order."); + auto num_invalid_vertices = thrust::count_if(handle.get_thrust_policy(), sources, @@ -149,26 +175,51 @@ void bfs(raft::handle_t const& handle, thrust::fill(handle.get_thrust_policy(), distances, - distances + push_graph_view.local_vertex_partition_range_size(), + distances + graph_view.local_vertex_partition_range_size(), invalid_distance); thrust::fill(handle.get_thrust_policy(), predecessor_first, - predecessor_first + push_graph_view.local_vertex_partition_range_size(), + predecessor_first + graph_view.local_vertex_partition_range_size(), invalid_vertex); - auto vertex_partition = vertex_partition_device_view_t( - push_graph_view.local_vertex_partition_view()); - if (n_sources) { - thrust::for_each( - handle.get_thrust_policy(), - sources, - sources + n_sources, - [vertex_partition, distances, predecessor_first] __device__(auto v) { - *(distances + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)) = - vertex_t{0}; - }); + auto output_first = thrust::make_permutation_iterator( + distances, + thrust::make_transform_iterator( + sources, detail::shift_left_t{graph_view.local_vertex_partition_range_first()})); + thrust::fill(handle.get_thrust_policy(), output_first, output_first + n_sources, vertex_t{0}); + + // 3. update meta data for direction optimizing BFS + + constexpr edge_t direction_optimizing_alpha = 14; + constexpr vertex_t direction_optimizing_beta = 24; + + std::optional> out_degrees{std::nullopt}; + std::optional> nzd_unvisited_vertices{std::nullopt}; + if (direction_optimizing) { + out_degrees = graph_view.compute_out_degrees(handle); + nzd_unvisited_vertices = rmm::device_uvector( + graph_view.local_vertex_partition_range_size(), handle.get_stream()); + (*nzd_unvisited_vertices) + .resize(thrust::distance( + (*nzd_unvisited_vertices).begin(), + thrust::copy_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()), + (*nzd_unvisited_vertices).begin(), + [vertex_partition, + sources = raft::device_span(sources, n_sources), + out_degrees = raft::device_span( + (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { + auto v_offset = + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); + return (out_degrees[v_offset] > edge_t{0}) && + !thrust::binary_search(thrust::seq, sources.begin(), sources.end(), v); + })), + handle.get_stream()); + (*nzd_unvisited_vertices).shrink_to_fit(handle.get_stream()); } - // 3. initialize BFS frontier + // 4. initialize BFS frontier constexpr size_t bucket_idx_cur = 0; constexpr size_t bucket_idx_next = 1; @@ -176,110 +227,235 @@ void bfs(raft::handle_t const& handle, vertex_frontier_t vertex_frontier(handle, num_buckets); - vertex_frontier.bucket(bucket_idx_cur).insert(sources, sources + n_sources); - rmm::device_uvector visited_flags( - packed_bool_size(push_graph_view.local_vertex_partition_range_size()), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - visited_flags.begin(), - visited_flags.end(), - packed_bool_empty_mask()); - rmm::device_uvector prev_visited_flags( - GraphViewType::is_multi_gpu ? size_t{0} : visited_flags.size(), - handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is false - auto dst_visited_flags = - GraphViewType::is_multi_gpu - ? edge_dst_property_t(handle, push_graph_view) - : edge_dst_property_t(handle); // relevant only if GraphViewType::is_multi_gpu is true - if constexpr (GraphViewType::is_multi_gpu) { - fill_edge_dst_property(handle, push_graph_view, false, dst_visited_flags); - } + + // 5. initialize BFS temporary state data + + auto prev_dst_visited_flags = edge_dst_property_t(handle, graph_view); + fill_edge_dst_property(handle, graph_view, prev_dst_visited_flags.mutable_view(), false); + auto dst_visited_flags = edge_dst_property_t( + handle, graph_view); // this may mark some vertices visited in previous iterations as unvisited + // (but this is OK as we check prev_dst_visited_flags first) + fill_edge_dst_property(handle, graph_view, dst_visited_flags.mutable_view(), false); + + fill_edge_dst_property(handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur).begin(), + vertex_frontier.bucket(bucket_idx_cur).end(), + prev_dst_visited_flags.mutable_view(), + true); // 4. BFS iteration vertex_t depth{0}; + bool top_down = true; + auto cur_aggregate_vertex_frontier_size = + static_cast(vertex_frontier.bucket(bucket_idx_cur).aggregate_size()); while (true) { - if (direction_optimizing) { - CUGRAPH_FAIL("unimplemented."); - } else { - if (GraphViewType::is_multi_gpu) { - update_edge_dst_property(handle, - push_graph_view, - vertex_frontier.bucket(bucket_idx_cur).begin(), - vertex_frontier.bucket(bucket_idx_cur).end(), - thrust::make_constant_iterator(true), - dst_visited_flags); - } else { - thrust::copy(handle.get_thrust_policy(), - visited_flags.begin(), - visited_flags.end(), - prev_visited_flags.begin()); - } + vertex_t next_aggregate_vertex_frontier_size{}; + if (top_down) { + topdown_e_op_t e_op{}; + e_op.prev_visited_flags = + detail::edge_partition_endpoint_property_device_view_t( + prev_dst_visited_flags.mutable_view()); + e_op.visited_flags = + detail::edge_partition_endpoint_property_device_view_t( + dst_visited_flags.mutable_view()); + e_op.dst_first = graph_view.local_edge_partition_dst_range_first(); + + auto [new_frontier_vertex_buffer, predecessor_buffer] = + transform_reduce_v_frontier_outgoing_e_by_dst(handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + e_op, + reduce_op::any()); - e_op_t e_op{}; - if constexpr (GraphViewType::is_multi_gpu) { - e_op.visited_flags = - detail::edge_partition_endpoint_property_device_view_t( - dst_visited_flags.mutable_view()); - e_op.dst_first = push_graph_view.local_edge_partition_dst_range_first(); - } else { - e_op.visited_flags = - detail::edge_partition_endpoint_property_device_view_t( - detail::edge_minor_property_view_t(visited_flags.data(), - vertex_t{0})); - e_op.prev_visited_flags = prev_visited_flags.data(); + auto input_pair_first = thrust::make_zip_iterator(thrust::make_constant_iterator(depth + 1), + predecessor_buffer.begin()); + thrust::scatter( + handle.get_thrust_policy(), + input_pair_first, + input_pair_first + new_frontier_vertex_buffer.size(), + thrust::make_transform_iterator( + new_frontier_vertex_buffer.begin(), + detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + thrust::make_zip_iterator(distances, predecessor_first)); + vertex_frontier.bucket(bucket_idx_next) = + key_bucket_t( + handle, std::move(new_frontier_vertex_buffer)); + + next_aggregate_vertex_frontier_size = + static_cast(vertex_frontier.bucket(bucket_idx_next).aggregate_size()); + if (next_aggregate_vertex_frontier_size == 0) { break; } + + fill_edge_dst_property(handle, + graph_view, + vertex_frontier.bucket(bucket_idx_next).begin(), + vertex_frontier.bucket(bucket_idx_next).end(), + prev_dst_visited_flags.mutable_view(), + true); + + if (direction_optimizing) { + auto m_f = thrust::transform_reduce( + handle.get_thrust_policy(), + vertex_frontier.bucket(bucket_idx_next).begin(), + vertex_frontier.bucket(bucket_idx_next).end(), + cuda::proclaim_return_type( + [vertex_partition, + out_degrees = raft::device_span( + (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { + auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); + return out_degrees[v_offset]; + }), + edge_t{0}, + thrust::plus{}); + + { + rmm::device_uvector tmp_vertices((*nzd_unvisited_vertices).size(), + handle.get_stream()); + tmp_vertices.resize( + thrust::distance(tmp_vertices.begin(), + thrust::set_difference(handle.get_thrust_policy(), + (*nzd_unvisited_vertices).begin(), + (*nzd_unvisited_vertices).end(), + vertex_frontier.bucket(bucket_idx_next).begin(), + vertex_frontier.bucket(bucket_idx_next).end(), + tmp_vertices.begin())), + handle.get_stream()); + nzd_unvisited_vertices = std::move(tmp_vertices); + } + + auto m_u = thrust::transform_reduce( + handle.get_thrust_policy(), + (*nzd_unvisited_vertices).begin(), + (*nzd_unvisited_vertices).end(), + cuda::proclaim_return_type( + [vertex_partition, + out_degrees = raft::device_span( + (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { + auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); + return out_degrees[v_offset]; + }), + edge_t{0}, + thrust::plus{}); + auto aggregate_m_f = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce( + handle.get_comms(), m_f, raft::comms::op_t::SUM, handle.get_stream()) + : m_f; + auto aggregate_m_u = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce( + handle.get_comms(), m_u, raft::comms::op_t::SUM, handle.get_stream()) + : m_u; + if ((aggregate_m_f * direction_optimizing_alpha > aggregate_m_u) && + (next_aggregate_vertex_frontier_size >= cur_aggregate_vertex_frontier_size)) { + top_down = false; + } } + if (top_down) { // staying in top-down + vertex_frontier.bucket(bucket_idx_cur) = + key_bucket_t(handle); + vertex_frontier.swap_buckets(bucket_idx_cur, bucket_idx_next); + } else { // swithcing to bottom-up + vertex_frontier.bucket(bucket_idx_cur) = + key_bucket_t( + handle, + raft::device_span((*nzd_unvisited_vertices).data(), + (*nzd_unvisited_vertices).size())); + vertex_frontier.bucket(bucket_idx_next) = + key_bucket_t(handle); + } + } else { // bottom up + bottomup_e_op_t e_op{}; + e_op.prev_visited_flags = + detail::edge_partition_endpoint_property_device_view_t( + prev_dst_visited_flags.mutable_view()); + e_op.dst_first = graph_view.local_edge_partition_dst_range_first(); auto [new_frontier_vertex_buffer, predecessor_buffer] = - transform_reduce_v_frontier_outgoing_e_by_dst(handle, - push_graph_view, + transform_reduce_v_frontier_outgoing_e_by_src(handle, + graph_view, vertex_frontier.bucket(bucket_idx_cur), edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), -#if 1 e_op, -#else - // FIXME: need to test more about the performance trade-offs between additional - // communication in updating dst_visited_flags (+ using atomics) vs reduced number of - // pushes (leading to both less computation & communication in reduction) - [vertex_partition, distances] __device__( - vertex_t src, vertex_t dst, auto, auto, auto) { - auto push = true; - if (vertex_partition.in_local_vertex_partition_range_nocheck(dst)) { - auto distance = - *(distances + - vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(dst)); - if (distance != invalid_distance) { push = false; } - } - return thrust::make_tuple(push, src); - }, -#endif reduce_op::any()); - update_v_frontier( - handle, - push_graph_view, - std::move(new_frontier_vertex_buffer), - std::move(predecessor_buffer), - vertex_frontier, - std::vector{bucket_idx_next}, - distances, - thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - [depth] __device__(auto v, auto v_val, auto pushed_val) { - auto update = (v_val == invalid_distance); - return thrust::make_tuple( - update ? thrust::optional{bucket_idx_next} : thrust::nullopt, - update ? thrust::optional>{thrust::make_tuple( - depth + 1, pushed_val)} - : thrust::nullopt); - }); - - vertex_frontier.bucket(bucket_idx_cur).clear(); - vertex_frontier.bucket(bucket_idx_cur).shrink_to_fit(); - vertex_frontier.swap_buckets(bucket_idx_cur, bucket_idx_next); - if (vertex_frontier.bucket(bucket_idx_cur).aggregate_size() == 0) { break; } + auto input_pair_first = thrust::make_zip_iterator(thrust::make_constant_iterator(depth + 1), + predecessor_buffer.begin()); + thrust::scatter( + handle.get_thrust_policy(), + input_pair_first, + input_pair_first + new_frontier_vertex_buffer.size(), + thrust::make_transform_iterator( + new_frontier_vertex_buffer.begin(), + detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + thrust::make_zip_iterator(distances, predecessor_first)); + + assert(direction_optimizing); + + { + rmm::device_uvector tmp_vertices((*nzd_unvisited_vertices).size(), + handle.get_stream()); + tmp_vertices.resize( + thrust::distance(tmp_vertices.begin(), + thrust::set_difference(handle.get_thrust_policy(), + (*nzd_unvisited_vertices).begin(), + (*nzd_unvisited_vertices).end(), + new_frontier_vertex_buffer.begin(), + new_frontier_vertex_buffer.end(), + tmp_vertices.begin())), + handle.get_stream()); + nzd_unvisited_vertices = std::move(tmp_vertices); + } + + next_aggregate_vertex_frontier_size = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce(handle.get_comms(), + static_cast(new_frontier_vertex_buffer.size()), + raft::comms::op_t::SUM, + handle.get_stream()) + : static_cast(new_frontier_vertex_buffer.size()); + if (next_aggregate_vertex_frontier_size == 0) { break; } + + fill_edge_dst_property(handle, + graph_view, + new_frontier_vertex_buffer.begin(), + new_frontier_vertex_buffer.end(), + prev_dst_visited_flags.mutable_view(), + true); + + auto aggregate_nzd_unvisted_vertices = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce(handle.get_comms(), + static_cast((*nzd_unvisited_vertices).size()), + raft::comms::op_t::SUM, + handle.get_stream()) + : static_cast((*nzd_unvisited_vertices).size()); + + if ((next_aggregate_vertex_frontier_size * direction_optimizing_beta < + aggregate_nzd_unvisted_vertices) && + (next_aggregate_vertex_frontier_size < cur_aggregate_vertex_frontier_size)) { + top_down = true; + } + + if (top_down) { // swithcing to top-down + vertex_frontier.bucket(bucket_idx_cur) = + key_bucket_t( + handle, std::move(new_frontier_vertex_buffer)); + } else { // staying in bottom-up + vertex_frontier.bucket(bucket_idx_cur) = + key_bucket_t( + handle, + raft::device_span((*nzd_unvisited_vertices).data(), + (*nzd_unvisited_vertices).size())); + } } + cur_aggregate_vertex_frontier_size = next_aggregate_vertex_frontier_size; depth++; if (depth >= depth_limit) { break; } diff --git a/cpp/src/traversal/k_hop_nbrs_impl.cuh b/cpp/src/traversal/k_hop_nbrs_impl.cuh index c74bb9cd58c..acf3cfe8fc5 100644 --- a/cpp/src/traversal/k_hop_nbrs_impl.cuh +++ b/cpp/src/traversal/k_hop_nbrs_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/reduce_op.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/vertex_frontier.cuh" #include diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index d1ff860f004..e1b7444b92f 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -16,12 +16,13 @@ #pragma once #include "prims/count_if_e.cuh" +#include "prims/detail/extract_transform_v_frontier_e.cuh" #include "prims/fill_edge_src_dst_property.cuh" #include "prims/key_store.cuh" #include "prims/kv_store.cuh" #include "prims/reduce_op.cuh" #include "prims/transform_reduce_e.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -640,6 +641,7 @@ rmm::device_uvector od_shortest_distances( cutoff, invalid_distance}; detail::transform_reduce_v_frontier_call_e_op_t< + false, thrust::tuple, weight_t, vertex_t, @@ -651,8 +653,8 @@ rmm::device_uvector od_shortest_distances( auto new_frontier_tagged_vertex_buffer = allocate_dataframe_buffer>(0, handle.get_stream()); - std::tie(new_frontier_tagged_vertex_buffer, distance_buffer) = - detail::extract_transform_v_frontier_e, weight_t>( + std::tie(new_frontier_tagged_vertex_buffer, distance_buffer) = detail:: + extract_transform_v_frontier_e, weight_t>( handle, graph_view, vertex_frontier.bucket(bucket_idx_near), diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index 4544fb1e8ce..47908524feb 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -19,7 +19,7 @@ #include "prims/fill_edge_src_dst_property.cuh" #include "prims/reduce_op.cuh" #include "prims/transform_reduce_e.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -172,8 +172,10 @@ void sssp(raft::handle_t const& handle, ? edge_src_property_t(handle, push_graph_view) : edge_src_property_t(handle); if (GraphViewType::is_multi_gpu) { - fill_edge_src_property( - handle, push_graph_view, std::numeric_limits::max(), edge_src_distances); + fill_edge_src_property(handle, + push_graph_view, + edge_src_distances.mutable_view(), + std::numeric_limits::max()); } if (push_graph_view.in_local_vertex_partition_range_nocheck(source_vertex)) { @@ -188,7 +190,7 @@ void sssp(raft::handle_t const& handle, vertex_frontier.bucket(bucket_idx_cur_near).begin(), vertex_frontier.bucket(bucket_idx_cur_near).end(), distances, - edge_src_distances); + edge_src_distances.mutable_view()); } auto vertex_partition = vertex_partition_device_view_t( diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 103953dd5d2..73a3104f27b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -661,9 +661,9 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) ############################################################################################### - # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST tests ------------------------------ - ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST - prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu) + # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_SRC_DST tests -------------------------- + ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_SRC_DST_TEST + prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu) ############################################################################################### # - MG PRIMS REDUCE_V tests ------------------------------------------------------------------- diff --git a/cpp/tests/components/mg_mis_test.cu b/cpp/tests/components/mg_mis_test.cu index c2469ba867d..9c50be3fa28 100644 --- a/cpp/tests/components/mg_mis_test.cu +++ b/cpp/tests/components/mg_mis_test.cu @@ -145,9 +145,9 @@ class Tests_MGMaximalIndependentSet dst_inclusion_cache = cugraph::edge_dst_property_t(*handle_, mg_graph_view); update_edge_src_property( - *handle_, mg_graph_view, inclusiong_flags.begin(), src_inclusion_cache); + *handle_, mg_graph_view, inclusiong_flags.begin(), src_inclusion_cache.mutable_view()); update_edge_dst_property( - *handle_, mg_graph_view, inclusiong_flags.begin(), dst_inclusion_cache); + *handle_, mg_graph_view, inclusiong_flags.begin(), dst_inclusion_cache.mutable_view()); } per_v_transform_reduce_outgoing_e( diff --git a/cpp/tests/components/mg_vertex_coloring_test.cu b/cpp/tests/components/mg_vertex_coloring_test.cu index 2c54e3cf065..14e15df502f 100644 --- a/cpp/tests/components/mg_vertex_coloring_test.cu +++ b/cpp/tests/components/mg_vertex_coloring_test.cu @@ -111,8 +111,10 @@ class Tests_MGGraphColoring cugraph::edge_src_property_t(*handle_, mg_graph_view); dst_color_cache = cugraph::edge_dst_property_t(*handle_, mg_graph_view); - update_edge_src_property(*handle_, mg_graph_view, d_colors.begin(), src_color_cache); - update_edge_dst_property(*handle_, mg_graph_view, d_colors.begin(), dst_color_cache); + update_edge_src_property( + *handle_, mg_graph_view, d_colors.begin(), src_color_cache.mutable_view()); + update_edge_dst_property( + *handle_, mg_graph_view, d_colors.begin(), dst_color_cache.mutable_view()); } rmm::device_uvector d_color_conflicts( diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 137f7db8625..8ad34a5a724 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -15,7 +15,6 @@ */ #include "prims/count_if_e.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" #include "utilities/device_comm_wrapper.hpp" diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 75b711fbd9c..681a7d8e6ff 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -16,7 +16,6 @@ #include "prims/per_v_pair_transform_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" #include "utilities/device_comm_wrapper.hpp" diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu index fd9192dcce5..bef6395a780 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -16,7 +16,6 @@ #include "prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh" #include "prims/reduce_op.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index be29c793ad5..d4f102127c5 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -16,7 +16,6 @@ #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/reduce_op.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu index 2b8d5d52905..2e8b2feaef3 100644 --- a/cpp/tests/prims/mg_transform_e.cu +++ b/cpp/tests/prims/mg_transform_e.cu @@ -160,7 +160,8 @@ class Tests_MGTransformE cugraph::edge_property_t edge_value_output(*handle_, mg_graph_view); - cugraph::fill_edge_property(*handle_, mg_graph_view, property_initial_value, edge_value_output); + cugraph::fill_edge_property( + *handle_, mg_graph_view, edge_value_output.mutable_view(), property_initial_value); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement diff --git a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu index 4fac6ef3be7..8eee3d5a6d5 100644 --- a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu @@ -16,7 +16,6 @@ #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" #include "utilities/device_comm_wrapper.hpp" diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index 4785a8bb01b..9f9e71f6e55 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -15,7 +15,6 @@ */ #include "prims/transform_reduce_e.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu index 9950b5bdbf4..67cacb27e0c 100644 --- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -16,7 +16,6 @@ #include "prims/reduce_op.cuh" #include "prims/transform_reduce_e_by_src_dst_key.cuh" -#include "prims/update_edge_src_dst_property.cuh" #include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu similarity index 55% rename from cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu rename to cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu index 335a7ec879c..596bb3688fb 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" -#include "prims/update_edge_src_dst_property.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" #include "prims/vertex_frontier.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" @@ -93,10 +92,10 @@ struct Prims_Usecase { }; template -class Tests_MGTransformReduceVFrontierOutgoingEByDst +class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst : public ::testing::TestWithParam> { public: - Tests_MGTransformReduceVFrontierOutgoingEByDst() {} + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst() {} static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } @@ -204,26 +203,68 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); - hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_dst"); + hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_src"); } - auto mg_new_frontier_key_buffer = + auto mg_reduce_by_src_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); - [[maybe_unused]] auto mg_payload_buffer = + [[maybe_unused]] auto mg_reduce_by_src_payload_buffer = cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); if constexpr (std::is_same_v) { - mg_new_frontier_key_buffer = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( - *handle_, - mg_graph_view, - mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_prop.view(), - mg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::null{}); + mg_reduce_by_src_new_frontier_key_buffer = + cugraph::transform_reduce_v_frontier_outgoing_e_by_src( + *handle_, + mg_graph_view, + mg_vertex_frontier.bucket(bucket_idx_cur), + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::null{}); + } else { + std::tie(mg_reduce_by_src_new_frontier_key_buffer, mg_reduce_by_src_payload_buffer) = + cugraph::transform_reduce_v_frontier_outgoing_e_by_src( + *handle_, + mg_graph_view, + mg_vertex_frontier.bucket(bucket_idx_cur), + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::plus{}); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_src"); + } + + auto mg_reduce_by_dst_new_frontier_key_buffer = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + [[maybe_unused]] auto mg_reduce_by_dst_payload_buffer = + cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); + + if constexpr (std::is_same_v) { + mg_reduce_by_dst_new_frontier_key_buffer = + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + *handle_, + mg_graph_view, + mg_vertex_frontier.bucket(bucket_idx_cur), + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::null{}); } else { - std::tie(mg_new_frontier_key_buffer, mg_payload_buffer) = + std::tie(mg_reduce_by_dst_new_frontier_key_buffer, mg_reduce_by_dst_payload_buffer) = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( *handle_, mg_graph_view, @@ -248,46 +289,110 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst if constexpr (std::is_same_v) { cugraph::unrenumber_int_vertices( *handle_, - mg_new_frontier_key_buffer.begin(), - mg_new_frontier_key_buffer.size(), + mg_reduce_by_src_new_frontier_key_buffer.begin(), + mg_reduce_by_src_new_frontier_key_buffer.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); + + cugraph::unrenumber_int_vertices( + *handle_, + mg_reduce_by_dst_new_frontier_key_buffer.begin(), + mg_reduce_by_dst_new_frontier_key_buffer.size(), (*mg_renumber_map).data(), mg_graph_view.vertex_partition_range_lasts()); } else { cugraph::unrenumber_int_vertices( *handle_, - std::get<0>(mg_new_frontier_key_buffer).begin(), - std::get<0>(mg_new_frontier_key_buffer).size(), + std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).begin(), + std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); + + cugraph::unrenumber_int_vertices( + *handle_, + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).begin(), + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).size(), (*mg_renumber_map).data(), mg_graph_view.vertex_partition_range_lasts()); } - auto mg_aggregate_new_frontier_key_buffer = + auto mg_reduce_by_src_aggregate_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); if constexpr (std::is_same_v) { - mg_aggregate_new_frontier_key_buffer = cugraph::test::device_gatherv( - *handle_, mg_new_frontier_key_buffer.data(), mg_new_frontier_key_buffer.size()); - } else { - std::get<0>(mg_aggregate_new_frontier_key_buffer) = + mg_reduce_by_src_aggregate_new_frontier_key_buffer = cugraph::test::device_gatherv(*handle_, - std::get<0>(mg_new_frontier_key_buffer).data(), - std::get<0>(mg_new_frontier_key_buffer).size()); - std::get<1>(mg_aggregate_new_frontier_key_buffer) = + mg_reduce_by_src_new_frontier_key_buffer.data(), + mg_reduce_by_src_new_frontier_key_buffer.size()); + } else { + std::get<0>(mg_reduce_by_src_aggregate_new_frontier_key_buffer) = + cugraph::test::device_gatherv( + *handle_, + std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).data(), + std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).size()); + std::get<1>(mg_reduce_by_src_aggregate_new_frontier_key_buffer) = + cugraph::test::device_gatherv( + *handle_, + std::get<1>(mg_reduce_by_src_new_frontier_key_buffer).data(), + std::get<1>(mg_reduce_by_src_new_frontier_key_buffer).size()); + } + + auto mg_reduce_by_dst_aggregate_new_frontier_key_buffer = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + if constexpr (std::is_same_v) { + mg_reduce_by_dst_aggregate_new_frontier_key_buffer = cugraph::test::device_gatherv(*handle_, - std::get<1>(mg_new_frontier_key_buffer).data(), - std::get<1>(mg_new_frontier_key_buffer).size()); + mg_reduce_by_dst_new_frontier_key_buffer.data(), + mg_reduce_by_dst_new_frontier_key_buffer.size()); + } else { + std::get<0>(mg_reduce_by_dst_aggregate_new_frontier_key_buffer) = + cugraph::test::device_gatherv( + *handle_, + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).data(), + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).size()); + std::get<1>(mg_reduce_by_dst_aggregate_new_frontier_key_buffer) = + cugraph::test::device_gatherv( + *handle_, + std::get<1>(mg_reduce_by_dst_new_frontier_key_buffer).data(), + std::get<1>(mg_reduce_by_dst_new_frontier_key_buffer).size()); } - [[maybe_unused]] auto mg_aggregate_payload_buffer = + [[maybe_unused]] auto mg_reduce_by_src_aggregate_payload_buffer = cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); if constexpr (!std::is_same_v) { if constexpr (std::is_arithmetic_v) { - mg_aggregate_payload_buffer = cugraph::test::device_gatherv( - *handle_, mg_payload_buffer.data(), mg_payload_buffer.size()); + mg_reduce_by_src_aggregate_payload_buffer = + cugraph::test::device_gatherv(*handle_, + mg_reduce_by_src_payload_buffer.data(), + mg_reduce_by_src_payload_buffer.size()); } else { - std::get<0>(mg_aggregate_payload_buffer) = cugraph::test::device_gatherv( - *handle_, std::get<0>(mg_payload_buffer).data(), std::get<0>(mg_payload_buffer).size()); - std::get<1>(mg_aggregate_payload_buffer) = cugraph::test::device_gatherv( - *handle_, std::get<1>(mg_payload_buffer).data(), std::get<1>(mg_payload_buffer).size()); + std::get<0>(mg_reduce_by_src_aggregate_payload_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<0>(mg_reduce_by_src_payload_buffer).data(), + std::get<0>(mg_reduce_by_src_payload_buffer).size()); + std::get<1>(mg_reduce_by_src_aggregate_payload_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<1>(mg_reduce_by_src_payload_buffer).data(), + std::get<1>(mg_reduce_by_src_payload_buffer).size()); + } + } + + [[maybe_unused]] auto mg_reduce_by_dst_aggregate_payload_buffer = + cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); + if constexpr (!std::is_same_v) { + if constexpr (std::is_arithmetic_v) { + mg_reduce_by_dst_aggregate_payload_buffer = + cugraph::test::device_gatherv(*handle_, + mg_reduce_by_dst_payload_buffer.data(), + mg_reduce_by_dst_payload_buffer.size()); + } else { + std::get<0>(mg_reduce_by_dst_aggregate_payload_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<0>(mg_reduce_by_dst_payload_buffer).data(), + std::get<0>(mg_reduce_by_dst_payload_buffer).size()); + std::get<1>(mg_reduce_by_dst_aggregate_payload_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<1>(mg_reduce_by_dst_payload_buffer).data(), + std::get<1>(mg_reduce_by_dst_payload_buffer).size()); } } @@ -304,15 +409,27 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst if (handle_->get_comms().get_rank() == int{0}) { if constexpr (std::is_same_v) { - thrust::sort(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(mg_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(mg_aggregate_new_frontier_key_buffer)); + thrust::sort( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_new_frontier_key_buffer)); + + thrust::sort( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_dst_aggregate_new_frontier_key_buffer)); } else { thrust::sort_by_key( handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(mg_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(mg_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(mg_aggregate_payload_buffer)); + cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_payload_buffer)); + + thrust::sort_by_key( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_dst_aggregate_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_payload_buffer)); } auto sg_graph_view = sg_graph.view(); @@ -354,22 +471,51 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst .insert(cugraph::get_dataframe_buffer_begin(sg_key_buffer), cugraph::get_dataframe_buffer_end(sg_key_buffer)); - auto sg_new_frontier_key_buffer = + auto sg_reduce_by_src_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); - [[maybe_unused]] auto sg_payload_buffer = + [[maybe_unused]] auto sg_reduce_by_src_payload_buffer = cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); if constexpr (std::is_same_v) { - sg_new_frontier_key_buffer = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( - *handle_, - sg_graph_view, - sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_prop.view(), - sg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::null{}); + sg_reduce_by_src_new_frontier_key_buffer = + cugraph::transform_reduce_v_frontier_outgoing_e_by_src( + *handle_, + sg_graph_view, + sg_vertex_frontier.bucket(bucket_idx_cur), + sg_src_prop.view(), + sg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::null{}); } else { - std::tie(sg_new_frontier_key_buffer, sg_payload_buffer) = + std::tie(sg_reduce_by_src_new_frontier_key_buffer, sg_reduce_by_src_payload_buffer) = + cugraph::transform_reduce_v_frontier_outgoing_e_by_src( + *handle_, + sg_graph_view, + sg_vertex_frontier.bucket(bucket_idx_cur), + sg_src_prop.view(), + sg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::plus{}); + } + + auto sg_reduce_by_dst_new_frontier_key_buffer = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + [[maybe_unused]] auto sg_reduce_by_dst_payload_buffer = + cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); + if constexpr (std::is_same_v) { + sg_reduce_by_dst_new_frontier_key_buffer = + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + *handle_, + sg_graph_view, + sg_vertex_frontier.bucket(bucket_idx_cur), + sg_src_prop.view(), + sg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + e_op_t{}, + cugraph::reduce_op::null{}); + } else { + std::tie(sg_reduce_by_dst_new_frontier_key_buffer, sg_reduce_by_dst_payload_buffer) = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( *handle_, sg_graph_view, @@ -382,29 +528,56 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst } if constexpr (std::is_same_v) { - thrust::sort(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_new_frontier_key_buffer)); + thrust::sort( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer)); + + thrust::sort( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_dst_new_frontier_key_buffer)); } else { - thrust::sort_by_key(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(sg_payload_buffer)); + thrust::sort_by_key( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer)); + + thrust::sort_by_key( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_dst_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer)); } - bool key_passed = - thrust::equal(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(mg_aggregate_new_frontier_key_buffer)); + bool key_passed = thrust::equal( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer)); + ASSERT_TRUE(key_passed); + + key_passed = thrust::equal( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_end(sg_reduce_by_dst_new_frontier_key_buffer), + cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_new_frontier_key_buffer)); ASSERT_TRUE(key_passed); if constexpr (!std::is_same_v) { - bool payload_passed = - thrust::equal(handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_payload_buffer), - cugraph::get_dataframe_buffer_begin(sg_payload_buffer), - cugraph::get_dataframe_buffer_end(mg_aggregate_payload_buffer)); + bool payload_passed = thrust::equal( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_payload_buffer)); + ASSERT_TRUE(payload_passed); + + payload_passed = thrust::equal( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer), + cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer), + cugraph::get_dataframe_buffer_end(mg_reduce_by_dst_aggregate_payload_buffer)); ASSERT_TRUE(payload_passed); } } @@ -417,20 +590,20 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst template std::unique_ptr - Tests_MGTransformReduceVFrontierOutgoingEByDst::handle_ = nullptr; + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst::handle_ = nullptr; -using Tests_MGTransformReduceVFrontierOutgoingEByDst_File = - Tests_MGTransformReduceVFrontierOutgoingEByDst; -using Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat = - Tests_MGTransformReduceVFrontierOutgoingEByDst; +using Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File = + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst; +using Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat = + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst; -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int32FloatVoidVoid) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int32FloatVoidVoid) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32FloatVoidVoid) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int32FloatVoidVoid) { auto param = GetParam(); run_current_test( @@ -438,13 +611,13 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int32FloatVoidInt32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int32FloatVoidInt32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32FloatVoidInt32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int32FloatVoidInt32) { auto param = GetParam(); run_current_test( @@ -452,14 +625,16 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int32FloatVoidTupleFloatInt32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, + CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); run_current_test>( std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, + CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); run_current_test>( @@ -467,13 +642,13 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int32FloatInt32Void) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int32FloatInt32Void) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32FloatInt32Void) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int32FloatInt32Void) { auto param = GetParam(); run_current_test( @@ -481,14 +656,14 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int32FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); run_current_test( @@ -496,7 +671,7 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int32Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); @@ -504,7 +679,7 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); @@ -513,14 +688,14 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt32Int64FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int64FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); run_current_test( @@ -528,14 +703,14 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt32Int64Float cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_File, CheckInt64Int64FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt64Int64FloatInt32Int32) +TEST_P(Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); run_current_test( @@ -545,7 +720,7 @@ TEST_P(Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, CheckInt64Int64Float INSTANTIATE_TEST_SUITE_P( file_test, - Tests_MGTransformReduceVFrontierOutgoingEByDst_File, + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_File, ::testing::Combine( ::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), @@ -554,7 +729,7 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, ::testing::Combine(::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( @@ -566,7 +741,7 @@ INSTANTIATE_TEST_SUITE_P( vertex & edge type combination) by command line arguments and do not include more than one Rmat_Usecase that differ only in scale or edge factor (to avoid running same benchmarks more than once) */ - Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, + Tests_MGTransformReduceVFrontierOutgoingEBySrcDst_Rmat, ::testing::Combine( ::testing::Values(Prims_Usecase{false, false}, Prims_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); diff --git a/cpp/tests/traversal/bfs_test.cpp b/cpp/tests/traversal/bfs_test.cpp index 8d3cdb3d24b..c5041378d78 100644 --- a/cpp/tests/traversal/bfs_test.cpp +++ b/cpp/tests/traversal/bfs_test.cpp @@ -101,7 +101,10 @@ class Tests_BFS : public ::testing::TestWithParam void run_current_test(BFS_Usecase const& bfs_usecase, input_usecase_t const& input_usecase) { - constexpr bool renumber = true; + bool constexpr renumber = true; + bool constexpr test_weighted = false; + bool constexpr drop_self_loops = false; + bool constexpr drop_multi_edges = false; using weight_t = float; @@ -117,7 +120,7 @@ class Tests_BFS : public ::testing::TestWithParam> d_renumber_map_labels{std::nullopt}; std::tie(graph, std::ignore, d_renumber_map_labels) = cugraph::test::construct_graph( - handle, input_usecase, false, renumber); + handle, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -132,6 +135,16 @@ class Tests_BFS : public ::testing::TestWithParam::edge_property(handle, graph_view, 2); graph_view.attach_edge_mask((*edge_mask).view()); } + { // FIXME: for testing, delete + auto num_self_loops = graph_view.count_self_loops(handle); + auto number_of_edges = graph_view.compute_number_of_edges(handle); + std::cout << "V=" << graph_view.number_of_vertices() << " E=" << number_of_edges + << " num_self_loops=" << num_self_loops; + if (graph_view.is_symmetric()) { + std::cout << " undirected E=" << ((number_of_edges - num_self_loops) / 2 + num_self_loops) + << std::endl; + } + } ASSERT_TRUE(static_cast(bfs_usecase.source) >= 0 && static_cast(bfs_usecase.source) < graph_view.number_of_vertices()) @@ -154,7 +167,7 @@ class Tests_BFS : public ::testing::TestWithParam::max()); if (cugraph::test::g_perf) { @@ -305,10 +318,12 @@ INSTANTIATE_TEST_SUITE_P( Tests_BFS_Rmat, ::testing::Values( // enable correctness checks - std::make_tuple(BFS_Usecase{0, false}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)), - std::make_tuple(BFS_Usecase{0, true}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + std::make_tuple( + BFS_Usecase{0, false}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true /* undirected */, false)), + std::make_tuple( + BFS_Usecase{0, true}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true /* undirected */, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -319,9 +334,13 @@ INSTANTIATE_TEST_SUITE_P( Tests_BFS_Rmat, ::testing::Values( // disable correctness checks for large graphs - std::make_tuple(BFS_Usecase{0, false, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)), - std::make_tuple(BFS_Usecase{0, true, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + std::make_tuple( + BFS_Usecase{0, false, false}, + cugraph::test::Rmat_Usecase( + 20, 16, 0.57, 0.19, 0.19, 0, true /* undirected */, false /* scramble vertex IDs */)), + std::make_tuple( + BFS_Usecase{0, true, false}, + cugraph::test::Rmat_Usecase( + 20, 16, 0.57, 0.19, 0.19, 0, true /* undirected */, false /* scramble vertex IDs */)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/mg_bfs_test.cpp b/cpp/tests/traversal/mg_bfs_test.cpp index 1b63ad3b085..aa59719f814 100644 --- a/cpp/tests/traversal/mg_bfs_test.cpp +++ b/cpp/tests/traversal/mg_bfs_test.cpp @@ -64,6 +64,11 @@ class Tests_MGBFS : public ::testing::TestWithParam> mg_renumber_map{std::nullopt}; std::tie(mg_graph, std::ignore, mg_renumber_map) = cugraph::test::construct_graph( - *handle_, input_usecase, false, true); + *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -95,6 +100,16 @@ class Tests_MGBFS : public ::testing::TestWithParam