From a1f8a653b6316bd43141a6df8ec7e0ac7f7b4a8a Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:01:44 +0100 Subject: [PATCH] Optimize K-Truss (#4375) This PR leverages our existing edge triangle count to implement both SG and MG K-Truss as our initial version. This PR also: - Exposes the `rx count` in several as our shuffling functions - Add C and C++ tests for MG K-Truss Closes #4500 Authors: - Joseph Nke (https://github.com/jnke2016) - Brad Rees (https://github.com/BradReesWork) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4375 --- cpp/CMakeLists.txt | 3 + .../cugraph/detail/shuffle_wrappers.hpp | 8 +- .../mtmg/detail/per_device_edgelist.hpp | 3 +- cpp/src/c_api/graph_functions.cpp | 2 +- cpp/src/c_api/graph_mg.cpp | 3 +- cpp/src/c_api/k_truss.cpp | 3 - cpp/src/community/detail/refine_impl.cuh | 1 + .../community/edge_triangle_count_impl.cuh | 2 +- cpp/src/community/k_truss_impl.cuh | 674 ++---------------- cpp/src/community/k_truss_mg_v32_e32.cu | 41 ++ cpp/src/community/k_truss_mg_v32_e64.cu | 41 ++ cpp/src/community/k_truss_mg_v64_e64.cu | 41 ++ cpp/src/community/triangle_count_impl.cuh | 2 +- .../weakly_connected_components_impl.cuh | 1 + cpp/src/cores/k_core_impl.cuh | 4 +- cpp/src/link_prediction/similarity_impl.cuh | 4 +- cpp/src/structure/coarsen_graph_impl.cuh | 4 +- .../structure/symmetrize_edgelist_impl.cuh | 2 + cpp/src/structure/transpose_graph_impl.cuh | 1 + .../transpose_graph_storage_impl.cuh | 1 + cpp/src/utilities/shuffle_vertex_pairs.cuh | 43 +- .../shuffle_vertex_pairs_mg_v32_e32.cu | 18 +- .../shuffle_vertex_pairs_mg_v32_e64.cu | 18 +- .../shuffle_vertex_pairs_mg_v64_e64.cu | 18 +- .../shuffle_vertices_mg_v32_integral.cu | 7 + .../shuffle_vertices_mg_v64_integral.cu | 7 + cpp/tests/CMakeLists.txt | 7 +- cpp/tests/c_api/mg_k_truss_test.c | 150 ++++ cpp/tests/community/mg_k_truss_test.cpp | 286 ++++++++ .../link_prediction/mg_similarity_test.cpp | 2 +- .../mg_weighted_similarity_test.cpp | 2 +- ...r_v_pair_transform_dst_nbr_intersection.cu | 1 + ...transform_dst_nbr_weighted_intersection.cu | 1 + ...has_edge_and_compute_multiplicity_test.cpp | 1 + cpp/tests/utilities/test_graphs.hpp | 1 + 35 files changed, 719 insertions(+), 684 deletions(-) create mode 100644 cpp/src/community/k_truss_mg_v32_e32.cu create mode 100644 cpp/src/community/k_truss_mg_v32_e64.cu create mode 100644 cpp/src/community/k_truss_mg_v64_e64.cu create mode 100644 cpp/tests/c_api/mg_k_truss_test.c create mode 100644 cpp/tests/community/mg_k_truss_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 61ec34c3319..441627cabce 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -284,6 +284,9 @@ set(CUGRAPH_SOURCES src/community/k_truss_sg_v64_e64.cu src/community/k_truss_sg_v32_e32.cu src/community/k_truss_sg_v32_e64.cu + src/community/k_truss_mg_v64_e64.cu + src/community/k_truss_mg_v32_e32.cu + src/community/k_truss_mg_v32_e64.cu src/lookup/lookup_src_dst_mg_v32_e32.cu src/lookup/lookup_src_dst_mg_v32_e64.cu src/lookup/lookup_src_dst_mg_v64_e64.cu diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index 69d48098a5d..7dffcce298a 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -53,7 +53,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -86,14 +87,15 @@ shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( * (exclusive) vertex ID. * * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights, - * edge ids and edge types + * edge ids and edge types and rx counts */ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, diff --git a/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp index 63d7fd9685e..61ad833a529 100644 --- a/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp +++ b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp @@ -251,7 +251,8 @@ class per_device_edgelist_t { store_transposed ? src_[0] : dst_[0], tmp_wgt, tmp_edge_id, - tmp_edge_type) = + tmp_edge_type, + std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle.raft_handle(), store_transposed ? std::move(dst_[0]) : std::move(src_[0]), diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 91371b988b3..df741a349d2 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -72,7 +72,7 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor { second_copy.data(), second_->as_type(), second_->size_, handle_.get_stream()); if constexpr (multi_gpu) { - std::tie(first_copy, second_copy, std::ignore, std::ignore, std::ignore) = + std::tie(first_copy, second_copy, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 22ceea3f629..cc4acd31743 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -167,7 +167,8 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { store_transposed ? edgelist_srcs : edgelist_dsts, edgelist_weights, edgelist_edge_ids, - edgelist_edge_types) = + edgelist_edge_types, + std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle_, std::move(store_transposed ? edgelist_dsts : edgelist_srcs), diff --git a/cpp/src/c_api/k_truss.cpp b/cpp/src/c_api/k_truss.cpp index 18e256b022a..37a0672676e 100644 --- a/cpp/src/c_api/k_truss.cpp +++ b/cpp/src/c_api/k_truss.cpp @@ -60,10 +60,7 @@ struct k_truss_functor : public cugraph::c_api::abstract_functor { { if constexpr (!cugraph::is_candidate::value) { unsupported(); - } else if constexpr (multi_gpu) { - unsupported(); } else { - // k_truss expects store_transposed == false if constexpr (store_transposed) { error_code_ = cugraph::c_api:: transpose_storage( diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 99fc1cd6fae..272e3d71f83 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -627,6 +627,7 @@ refine_clustering( store_transposed ? d_srcs : d_dsts, d_weights, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index c4277e240be..225687c4cf0 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -250,7 +250,7 @@ edge_property_t, edge_t> edge_t handle.get_stream()); // There are still multiple copies here but is it worth sorting and reducing again? - std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore) = + std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore, std::ignore) = shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning +#include #include #include #include @@ -41,351 +43,6 @@ namespace cugraph { -template -struct unroll_edge { - size_t num_valid_edges{}; - raft::device_span num_triangles{}; - EdgeIterator edge_to_unroll_first{}; - EdgeIterator transposed_valid_edge_first{}; - EdgeIterator transposed_valid_edge_last{}; - EdgeIterator transposed_invalid_edge_last{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - // edges are sorted with destination as key so reverse the edge when looking it - auto pair = thrust::make_tuple(thrust::get<1>(*(edge_to_unroll_first + i)), - thrust::get<0>(*(edge_to_unroll_first + i))); - // Find its position in either partition of the transposed edgelist - // An edge can be in found in either of the two partitions (valid or invalid) - auto itr = thrust::lower_bound( - thrust::seq, transposed_valid_edge_last, transposed_invalid_edge_last, pair); - size_t idx{}; - if (itr != transposed_invalid_edge_last && *itr == pair) { - idx = - static_cast(thrust::distance(transposed_valid_edge_last, itr) + num_valid_edges); - } else { - // The edge must be in the first boundary - itr = thrust::lower_bound( - thrust::seq, transposed_valid_edge_first, transposed_valid_edge_last, pair); - assert(*itr == pair); - idx = thrust::distance(transposed_valid_edge_first, itr); - } - cuda::atomic_ref atomic_counter(num_triangles[idx]); - auto r = atomic_counter.fetch_sub(edge_t{1}, cuda::std::memory_order_relaxed); - } -}; - -// FIXME: May re-locate this function as a general utility function for graph algorithm -// implementations. -template -rmm::device_uvector compute_prefix_sum(raft::handle_t const& handle, - raft::device_span sorted_vertices, - raft::device_span query_vertices) -{ - rmm::device_uvector prefix_sum(query_vertices.size() + 1, handle.get_stream()); - - auto count_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_t{0}), - cuda::proclaim_return_type( - [query_vertices, - num_edges = sorted_vertices.size(), - sorted_vertices = sorted_vertices.begin()] __device__(size_t idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, sorted_vertices, sorted_vertices + num_edges, query_vertices[idx]); - - auto itr_upper = thrust::upper_bound( - thrust::seq, itr_lower, sorted_vertices + num_edges, query_vertices[idx]); - vertex_t dist = thrust::distance(itr_lower, itr_upper); - - return dist; - })); - - thrust::exclusive_scan(handle.get_thrust_policy(), - count_first, - count_first + query_vertices.size() + 1, - prefix_sum.begin()); - - return prefix_sum; -} - -template -edge_t remove_overcompensating_edges(raft::handle_t const& handle, - size_t buffer_size, - EdgeIterator potential_closing_or_incoming_edges, - EdgeIterator incoming_or_potential_closing_edges, - raft::device_span invalid_edgelist_srcs, - raft::device_span invalid_edgelist_dsts) -{ - // To avoid over-compensating, check whether the 'potential_closing_edges' - // are within the invalid edges. If yes, the was already unrolled - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - thrust::make_zip_iterator(potential_closing_or_incoming_edges + buffer_size, - incoming_or_potential_closing_edges + buffer_size), - [num_invalid_edges = invalid_edgelist_dsts.size(), - invalid_first = - thrust::make_zip_iterator(invalid_edgelist_dsts.begin(), invalid_edgelist_srcs.begin()), - invalid_last = thrust::make_zip_iterator(invalid_edgelist_dsts.end(), - invalid_edgelist_srcs.end())] __device__(auto e) { - auto potential_edge = thrust::get<0>(e); - auto transposed_potential_or_incoming_edge = - thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); - auto itr = thrust::lower_bound( - thrust::seq, invalid_first, invalid_last, transposed_potential_or_incoming_edge); - return (itr != invalid_last && *itr == transposed_potential_or_incoming_edge); - }); - - auto dist = thrust::distance(thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - edges_not_overcomp); - - return dist; -} - -template -void unroll_p_r_or_q_r_edges(raft::handle_t const& handle, - graph_view_t& graph_view, - size_t num_invalid_edges, - size_t num_valid_edges, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts, - raft::device_span num_triangles) -{ - auto prefix_sum_valid = compute_prefix_sum( - handle, - raft::device_span(edgelist_dsts.data(), num_valid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - auto prefix_sum_invalid = compute_prefix_sum( - handle, - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - auto potential_closing_edges = allocate_dataframe_buffer>( - prefix_sum_valid.back_element(handle.get_stream()) + - prefix_sum_invalid.back_element(handle.get_stream()), - handle.get_stream()); - - auto incoming_edges_to_r = allocate_dataframe_buffer>( - prefix_sum_valid.back_element(handle.get_stream()) + - prefix_sum_invalid.back_element(handle.get_stream()), - handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_invalid_edges), - [num_valid_edges, - num_invalid_edges, - invalid_dst_first = edgelist_dsts.begin() + num_valid_edges, - invalid_src_first = edgelist_srcs.begin() + num_valid_edges, - valid_src_first = edgelist_srcs.begin(), - valid_dst_first = edgelist_dsts.begin(), - prefix_sum_valid = prefix_sum_valid.data(), - prefix_sum_invalid = prefix_sum_invalid.data(), - potential_closing_edges = get_dataframe_buffer_begin(potential_closing_edges), - incoming_edges_to_r = get_dataframe_buffer_begin(incoming_edges_to_r)] __device__(auto idx) { - auto src = invalid_src_first[idx]; - auto dst = invalid_dst_first[idx]; - auto dst_array_end_valid = valid_dst_first + num_valid_edges; - - auto itr_lower_valid = - thrust::lower_bound(thrust::seq, valid_dst_first, dst_array_end_valid, dst); - auto idx_lower_valid = thrust::distance( - valid_dst_first, - itr_lower_valid); // Need a binary search to find the begining of the range - - auto invalid_end_dst = invalid_dst_first + num_invalid_edges; - - auto itr_lower_invalid = - thrust::lower_bound(thrust::seq, invalid_dst_first, invalid_end_dst, dst); - auto idx_lower_invalid = thrust::distance( - invalid_dst_first, - itr_lower_invalid); // Need a binary search to find the begining of the range - - auto incoming_edges_to_r_first_valid = thrust::make_zip_iterator( - valid_src_first + idx_lower_valid, thrust::make_constant_iterator(dst)); - thrust::copy( - thrust::seq, - incoming_edges_to_r_first_valid, - incoming_edges_to_r_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - incoming_edges_to_r + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto incoming_edges_to_r_first_invalid = thrust::make_zip_iterator( - invalid_src_first + idx_lower_invalid, thrust::make_constant_iterator(dst)); - thrust::copy( - thrust::seq, - incoming_edges_to_r_first_invalid, - incoming_edges_to_r_first_invalid + (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - - incoming_edges_to_r + prefix_sum_invalid[idx] + prefix_sum_valid[idx + 1]); - - if constexpr (is_q_r_edge) { - auto potential_closing_edges_first_valid = thrust::make_zip_iterator( - valid_src_first + idx_lower_valid, thrust::make_constant_iterator(src)); - thrust::copy( - thrust::seq, - potential_closing_edges_first_valid, - potential_closing_edges_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - potential_closing_edges + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto potential_closing_edges_first_invalid = thrust::make_zip_iterator( - invalid_src_first + idx_lower_invalid, thrust::make_constant_iterator(src)); - thrust::copy(thrust::seq, - potential_closing_edges_first_invalid, - potential_closing_edges_first_invalid + - (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - potential_closing_edges + prefix_sum_invalid[idx] + prefix_sum_valid[idx + 1]); - - } else { - auto potential_closing_edges_first_valid = thrust::make_zip_iterator( - thrust::make_constant_iterator(src), valid_src_first + idx_lower_valid); - thrust::copy( - thrust::seq, - potential_closing_edges_first_valid, - potential_closing_edges_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - potential_closing_edges + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto potential_closing_edges_first_invalid = thrust::make_zip_iterator( - thrust::make_constant_iterator(src), invalid_src_first + idx_lower_invalid); - thrust::copy( - thrust::seq, - potential_closing_edges_first_invalid, - potential_closing_edges_first_invalid + - (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - potential_closing_edges + prefix_sum_invalid[idx] + (prefix_sum_valid[idx + 1])); - } - }); - - auto edges_exist = graph_view.has_edge( - handle, - raft::device_span(std::get<0>(potential_closing_edges).data(), - std::get<0>(potential_closing_edges).size()), - raft::device_span(std::get<1>(potential_closing_edges).data(), - std::get<1>(potential_closing_edges).size())); - - auto edge_to_existance = thrust::make_zip_iterator( - thrust::make_zip_iterator(get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r)), - edges_exist.begin()); - - auto has_edge_last = thrust::remove_if(handle.get_thrust_policy(), - edge_to_existance, - edge_to_existance + edges_exist.size(), - [] __device__(auto e) { - auto edge_exists = thrust::get<1>(e); - return edge_exists == 0; - }); - - auto num_edge_exists = thrust::distance(edge_to_existance, has_edge_last); - - // After pushing the non-existant edges to the second partition, - // remove them by resizing both vertex pair buffer - resize_dataframe_buffer(potential_closing_edges, num_edge_exists, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edge_exists, handle.get_stream()); - - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - num_edge_exists, - get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - // After pushing the non-existant edges to the second partition, - // remove them by resizing both vertex pair buffer - resize_dataframe_buffer(potential_closing_edges, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edges_not_overcomp, handle.get_stream()); - - // Extra check for 'incoming_edges_to_r' - if constexpr (!is_q_r_edge) { - // Exchange the arguments (incoming_edges_to_r, num_edges_not_overcomp) order - // To also check if the 'incoming_edges_to_r' belong the the invalid_edgelist - num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp, - get_dataframe_buffer_begin(incoming_edges_to_r), - get_dataframe_buffer_begin(potential_closing_edges), - raft::device_span(edgelist_srcs.data() + num_valid_edges, - num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)); - - resize_dataframe_buffer(potential_closing_edges, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edges_not_overcomp, handle.get_stream()); - } - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator(get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r)), - thrust::make_zip_iterator( - get_dataframe_buffer_begin(potential_closing_edges) + num_edges_not_overcomp, - get_dataframe_buffer_begin(incoming_edges_to_r) + num_edges_not_overcomp), - [num_triangles = num_triangles.begin(), - num_valid_edges, - invalid_first = thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - invalid_last = thrust::make_zip_iterator( - edgelist_dsts.end(), edgelist_srcs.end())] __device__(auto potential_or_incoming_e) { - auto potential_e = thrust::get<0>(potential_or_incoming_e); - auto incoming_e_to_r = thrust::get<1>(potential_or_incoming_e); - // thrust::tuple> transposed_invalid_edge_; - auto transposed_invalid_edge = - thrust::make_tuple(thrust::get<1>(incoming_e_to_r), thrust::get<1>(potential_e)); - - if constexpr (!is_q_r_edge) { - transposed_invalid_edge = - thrust::make_tuple(thrust::get<1>(incoming_e_to_r), thrust::get<0>(potential_e)); - } - auto itr = - thrust::lower_bound(thrust::seq, invalid_first, invalid_last, transposed_invalid_edge); - if (itr != invalid_last) { assert(*itr == transposed_invalid_edge); } - auto dist = thrust::distance(invalid_first, itr) + num_valid_edges; - - cuda::atomic_ref atomic_counter(num_triangles[dist]); - auto r = atomic_counter.fetch_sub(edge_t{1}, cuda::std::memory_order_relaxed); - }); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_edges_not_overcomp), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(potential_closing_edges), - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end())}); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_edges_not_overcomp), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(incoming_edges_to_r), - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end())}); -} - namespace { template @@ -434,28 +91,6 @@ struct extract_low_to_high_degree_edges_t { } }; -template -struct generate_p_r_or_q_r_from_p_q { - size_t chunk_start{}; - raft::device_span intersection_offsets{}; - raft::device_span intersection_indices{}; - raft::device_span invalid_srcs{}; - raft::device_span invalid_dsts{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::upper_bound( - thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - - if constexpr (generate_p_r) { - return thrust::make_tuple(invalid_srcs[chunk_start + idx], intersection_indices[i]); - - } else { - return thrust::make_tuple(invalid_dsts[chunk_start + idx], intersection_indices[i]); - } - } -}; } // namespace template @@ -470,8 +105,6 @@ k_truss(raft::handle_t const& handle, { // 1. Check input arguments. - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - CUGRAPH_EXPECTS(graph_view.is_symmetric(), "Invalid input arguments: K-truss currently supports undirected graphs only."); CUGRAPH_EXPECTS(!graph_view.is_multigraph(), @@ -497,7 +130,7 @@ k_truss(raft::handle_t const& handle, exclude_self_loop_t{}); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning core_number_span{core_numbers.data(), core_numbers.size()}; - rmm::device_uvector srcs{0, handle.get_stream()}; - rmm::device_uvector dsts{0, handle.get_stream()}; - std::tie(srcs, dsts, wgts) = k_core(handle, - cur_graph_view, - edge_weight_view, - k - 1, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); + auto [srcs, dsts, wgts] = k_core(handle, + cur_graph_view, + edge_weight_view, + k - 1, + std::make_optional(k_core_degree_type_t::OUT), + std::make_optional(core_number_span)); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(srcs), std::move(dsts), std::nullopt, std::nullopt, std::nullopt); + handle, std::move(srcs), std::move(dsts), std::move(wgts), std::nullopt, std::nullopt); } std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = create_graph_from_edgelist( handle, @@ -566,7 +197,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); @@ -577,10 +208,11 @@ k_truss(raft::handle_t const& handle, (*renumber_map).data(), *vertex_partition_range_lasts); } + renumber_map = std::move(tmp_renumber_map); } - // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. + // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -625,7 +257,7 @@ k_truss(raft::handle_t const& handle, } if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore) = + std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::optional> num_triangles{std::nullopt}; - std::optional> edgelist_wgts{std::nullopt}; edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); - - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles, std::ignore) = - decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional(prop_num_triangles.view()), - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - auto transposed_edge_first = - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); + cugraph::edge_property_t edge_mask(handle, cur_graph_view); + cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); - auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + while (true) { + // FIXME: This approach is very expensive when invalidating only few edges per iteration + // and should be address. + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view); - auto transposed_edge_triangle_count_pair_first = - thrust::make_zip_iterator(transposed_edge_first, (*num_triangles).begin()); + // Mask all the edges that have k - 2 count - thrust::sort_by_key(handle.get_thrust_policy(), - transposed_edge_first, - transposed_edge_first + edgelist_srcs.size(), - (*num_triangles).begin()); + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), true); - cur_graph_view.attach_edge_mask(edge_mask.view()); - - while (true) { - // 'invalid_transposed_edge_triangle_count_first' marks the beginning of the edges to be - // removed 'invalid_transposed_edge_triangle_count_first' + edgelist_srcs.size() marks the end - // of the edges to be removed 'edge_triangle_count_pair_first' marks the begining of the valid - // edges. - auto invalid_transposed_edge_triangle_count_first = - thrust::stable_partition(handle.get_thrust_policy(), - transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + edgelist_srcs.size(), - [k] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles >= k - 2; - }); - auto num_invalid_edges = static_cast( - thrust::distance(invalid_transposed_edge_triangle_count_first, - transposed_edge_triangle_count_pair_first + edgelist_srcs.size())); - - if (num_invalid_edges == 0) { break; } - - auto num_valid_edges = edgelist_srcs.size() - num_invalid_edges; - - // case 1. For the (p, q), find intersection 'r'. - - // nbr_intersection requires the edges to be sort by 'src' - // sort the invalid edges by src for nbr intersection - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - size_t prev_chunk_size = 0; - size_t chunk_num_invalid_edges = num_invalid_edges; - - auto num_chunks = - raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); - - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - thrust::sort_by_key(handle.get_thrust_policy(), - edge_first + num_valid_edges, - edge_first + edgelist_srcs.size(), - (*num_triangles).begin() + num_valid_edges); - - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + num_valid_edges + prev_chunk_size, - edge_first + num_valid_edges + prev_chunk_size + chunk_size, - std::array{true, true}, - do_expensive_check); - - // Update the number of triangles of each (p, q) edges by looking at their intersection - // size. - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(chunk_size), - [chunk_start = prev_chunk_size, - num_triangles = raft::device_span((*num_triangles).data() + num_valid_edges, - num_invalid_edges), - intersection_offsets = raft::device_span( - intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { - num_triangles[chunk_start + i] -= - (intersection_offsets[i + 1] - intersection_offsets[i]); - }); - - // FIXME: Find a way to not have to maintain a dataframe_buffer - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)}); - - // Unrolling the edges require the edges to be sorted by destination - // re-sort the invalid edges by 'dst' - thrust::sort_by_key(handle.get_thrust_policy(), - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size(), - (*num_triangles).begin() + num_valid_edges); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } - // case 2: unroll (q, r) - // For each (q, r) edges to unroll, find the incoming edges to 'r' let's say from 'p' and - // create the pair (p, q) - cugraph::unroll_p_r_or_q_r_edges( - handle, - cur_graph_view, - num_invalid_edges, - num_valid_edges, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span((*num_triangles).data(), (*num_triangles).size())); - - // case 3: unroll (p, r) - cugraph::unroll_p_r_or_q_r_edges( + cugraph::transform_e( handle, cur_graph_view, - num_invalid_edges, - num_valid_edges, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span((*num_triangles).data(), (*num_triangles).size())); - - // Remove edges that have a triangle count of zero. Those should not be accounted - // for during the unroling phase. - auto edges_with_triangle_last = thrust::stable_partition( - handle.get_thrust_policy(), - transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + (*num_triangles).size(), - [] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles > 0; - }); - - auto num_edges_with_triangles = static_cast( - thrust::distance(transposed_edge_triangle_count_pair_first, edges_with_triangle_last)); - - thrust::sort(handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin() + num_edges_with_triangles, - edgelist_dsts.begin() + num_edges_with_triangles), - thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end())); - - cugraph::edge_bucket_t edges_with_no_triangle(handle); - edges_with_no_triangle.insert(edgelist_srcs.begin() + num_edges_with_triangles, - edgelist_srcs.end(), - edgelist_dsts.begin() + num_edges_with_triangles); - - cur_graph_view.clear_edge_mask(); - if (edge_weight_view) { - cugraph::transform_e( - handle, - cur_graph_view, - edges_with_no_triangle, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - *edge_weight_view, - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return false; - }, - edge_mask.mutable_view(), - false); - } else { - cugraph::transform_e( - handle, - cur_graph_view, - edges_with_no_triangle, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__( - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return false; - }, - edge_mask.mutable_view(), - false); - } + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count >= k - 2; + }, + edge_mask.mutable_view(), + false); + cur_graph_view.attach_edge_mask(edge_mask.view()); - edgelist_srcs.resize(num_edges_with_triangles, handle.get_stream()); - edgelist_dsts.resize(num_edges_with_triangles, handle.get_stream()); - (*num_triangles).resize(num_edges_with_triangles, handle.get_stream()); + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_wgts{std::nullopt}; + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -923,7 +340,8 @@ k_truss(raft::handle_t const& handle, edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::optional>(std::nullopt)); + std::make_optional( + raft::device_span((*renumber_map).data(), (*renumber_map).size()))); std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, diff --git a/cpp/src/community/k_truss_mg_v32_e32.cu b/cpp/src/community/k_truss_mg_v32_e32.cu new file mode 100644 index 00000000000..4feb69f6098 --- /dev/null +++ b/cpp/src/community/k_truss_mg_v32_e32.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// MG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t k, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/community/k_truss_mg_v32_e64.cu b/cpp/src/community/k_truss_mg_v32_e64.cu new file mode 100644 index 00000000000..b07f9382612 --- /dev/null +++ b/cpp/src/community/k_truss_mg_v32_e64.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// MG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/community/k_truss_mg_v64_e64.cu b/cpp/src/community/k_truss_mg_v64_e64.cu new file mode 100644 index 00000000000..1c730fe272d --- /dev/null +++ b/cpp/src/community/k_truss_mg_v64_e64.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// MG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh index 0b453cfe262..e902901cd36 100644 --- a/cpp/src/community/triangle_count_impl.cuh +++ b/cpp/src/community/triangle_count_impl.cuh @@ -439,7 +439,7 @@ void triangle_count(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(edge_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/src/cores/k_core_impl.cuh b/cpp/src/cores/k_core_impl.cuh index 06402cc3382..2c5bf987a47 100644 --- a/cpp/src/cores/k_core_impl.cuh +++ b/cpp/src/cores/k_core_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,6 @@ k_core(raft::handle_t const& handle, std::optional> core_numbers, bool do_expensive_check) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - rmm::device_uvector computed_core_numbers(0, handle.get_stream()); if (!core_numbers) { diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 487f31e5e03..b39895129dc 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -408,7 +408,7 @@ all_pairs_similarity(raft::handle_t const& handle, // shuffle vertex pairs auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - std::tie(v1, v2, std::ignore, std::ignore, std::ignore) = + std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_vertex_pairs_with_values_by_gpu_id_impl( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -178,25 +179,27 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( handle.get_stream()); handle.sync_stream(); + std::vector rx_counts{}; + if (mem_frugal_flag) { // trade-off potential parallelism to lower peak memory - std::tie(majors, std::ignore) = + std::tie(majors, rx_counts) = shuffle_values(comm, majors.begin(), h_tx_value_counts, handle.get_stream()); - std::tie(minors, std::ignore) = + std::tie(minors, rx_counts) = shuffle_values(comm, minors.begin(), h_tx_value_counts, handle.get_stream()); if (weights) { - std::tie(weights, std::ignore) = + std::tie(weights, rx_counts) = shuffle_values(comm, (*weights).begin(), h_tx_value_counts, handle.get_stream()); } if (edge_ids) { - std::tie(edge_ids, std::ignore) = + std::tie(edge_ids, rx_counts) = shuffle_values(comm, (*edge_ids).begin(), h_tx_value_counts, handle.get_stream()); } if (edge_types) { - std::tie(edge_types, std::ignore) = + std::tie(edge_types, rx_counts) = shuffle_values(comm, (*edge_types).begin(), h_tx_value_counts, handle.get_stream()); } } else { @@ -204,7 +207,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( if (edge_ids) { if (edge_types) { std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids, edge_types), - std::ignore) = + rx_counts) = shuffle_values(comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), @@ -214,7 +217,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator( majors.begin(), minors.begin(), weights->begin(), edge_ids->begin()), @@ -223,14 +226,14 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } } else { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, weights, edge_types), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, weights, edge_types), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator( majors.begin(), minors.begin(), weights->begin(), edge_types->begin()), h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, weights), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, weights), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), weights->begin()), h_tx_value_counts, @@ -240,7 +243,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } else { if (edge_ids) { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, edge_ids, edge_types), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, edge_ids, edge_types), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator( @@ -248,7 +251,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, edge_ids), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, edge_ids), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_ids->begin()), h_tx_value_counts, @@ -256,13 +259,13 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } } else { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, edge_types), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, edge_types), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_types->begin()), h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator(majors.begin(), minors.begin()), h_tx_value_counts, @@ -276,7 +279,8 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( std::move(minors), std::move(weights), std::move(edge_ids), - std::move(edge_types)); + std::move(edge_types), + std::move(rx_counts)); } } // namespace @@ -288,7 +292,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -320,7 +325,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -366,7 +372,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& edge_srcs, rmm::device_uvector&& edge_dsts, diff --git a/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e32.cu b/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e32.cu index 7943ebcd5f4..db5a7c0e9bd 100644 --- a/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e32.cu +++ b/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e32.cu @@ -36,7 +36,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -49,7 +50,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -62,7 +64,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -76,7 +79,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -92,7 +96,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -104,7 +109,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, diff --git a/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e64.cu b/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e64.cu index 230fce435f9..d79b2379224 100644 --- a/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e64.cu +++ b/cpp/src/utilities/shuffle_vertex_pairs_mg_v32_e64.cu @@ -35,7 +35,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -48,7 +49,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -61,7 +63,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -75,7 +78,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -91,7 +95,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -103,7 +108,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, diff --git a/cpp/src/utilities/shuffle_vertex_pairs_mg_v64_e64.cu b/cpp/src/utilities/shuffle_vertex_pairs_mg_v64_e64.cu index 8134c41696b..605976f7076 100644 --- a/cpp/src/utilities/shuffle_vertex_pairs_mg_v64_e64.cu +++ b/cpp/src/utilities/shuffle_vertex_pairs_mg_v64_e64.cu @@ -36,7 +36,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -49,7 +50,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -62,7 +64,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -76,7 +79,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -92,7 +96,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -104,7 +109,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, diff --git a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu index 0c91eb546d6..db7be5a3031 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu @@ -40,6 +40,13 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& d_values, std::vector const& vertex_partition_range_lasts); +template std::tuple, rmm::device_uvector> +shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values, + std::vector const& vertex_partition_range_lasts); + template rmm::device_uvector shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, rmm::device_uvector&& d_vertices); diff --git a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu index 5abce7c0783..7d968006bc7 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu @@ -35,6 +35,13 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& d_values, std::vector const& vertex_partition_range_lasts); +template std::tuple, rmm::device_uvector> +shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values, + std::vector const& vertex_partition_range_lasts); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 892ba91af86..52d257b9bea 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -500,7 +500,7 @@ ConfigureTest(CORE_NUMBER_TEST cores/core_number_test.cpp) ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) ################################################################################################### -# - K-truss tests --------------------------------------------------------------------------------- +# - K-truss tests -------------------------------------------------------------------------- ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) ################################################################################################### @@ -619,6 +619,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------- ConfigureTestMG(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) + ############################################################################################### + # - MG K-TRUSS tests -------------------------------------------------------------------------- + ConfigureTestMG(MG_K_TRUSS_TEST community/mg_k_truss_test.cpp) + ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST @@ -783,6 +787,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_COUNT_MULTI_EDGES c_api/mg_count_multi_edges_test.c) ConfigureCTestMG(MG_CAPI_EGONET_TEST c_api/mg_egonet_test.c) ConfigureCTestMG(MG_CAPI_TWO_HOP_NEIGHBORS_TEST c_api/mg_two_hop_neighbors_test.c) + ConfigureCTestMG(MG_CAPI_K_TRUSS c_api/mg_k_truss_test.c) rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing_mg DESTINATION bin/gtests/libcugraph_mg) diff --git a/cpp/tests/c_api/mg_k_truss_test.c b/cpp/tests/c_api/mg_k_truss_test.c new file mode 100644 index 00000000000..e406eb330a7 --- /dev/null +++ b/cpp/tests/c_api/mg_k_truss_test.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +/* + * Simple check of creating a graph from a COO on device memory. + */ +int generic_k_truss_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_edges, + size_t num_results, + size_t k, + bool_t store_transposed, + vertex_t* h_result_src, + vertex_t* h_result_dst, + weight_t* h_result_wgt) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + + cugraph_induced_subgraph_result_t* result = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t size_t_tid = SIZE_T; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_k_truss_subgraph(handle, graph, k, FALSE, &result, &ret_error); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_k_truss failed."); + + cugraph_type_erased_device_array_view_t* k_truss_src; + cugraph_type_erased_device_array_view_t* k_truss_dst; + cugraph_type_erased_device_array_view_t* k_truss_wgt; + + k_truss_src = cugraph_induced_subgraph_get_sources(result); + k_truss_dst = cugraph_induced_subgraph_get_destinations(result); + k_truss_wgt = cugraph_induced_subgraph_get_edge_weights(result); + + size_t k_truss_size = cugraph_type_erased_device_array_view_size(k_truss_src); + + vertex_t h_k_truss_src[k_truss_size]; + vertex_t h_k_truss_dst[k_truss_size]; + weight_t h_k_truss_wgt[k_truss_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_src, k_truss_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_dst, k_truss_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_wgt, k_truss_wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (size_t i = 0; (i < k_truss_size) && (test_ret_value == 0); ++i) { + bool_t found = FALSE; + for (size_t j = 0; (j < num_results) && !found; ++j) { + if ((h_k_truss_src[i] == h_result_src[j]) && (h_k_truss_dst[i] == h_result_dst[j]) && + (h_k_truss_wgt[i] == h_result_wgt[j])) + found = TRUE; + } + TEST_ASSERT(test_ret_value, found, "k_truss subgraph has an edge that doesn't match"); + } + + cugraph_induced_subgraph_result_free(result); + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_k_truss_subgraph(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 14; + size_t num_vertices = 7; + size_t num_results = 6; + size_t k = 3; + + vertex_t h_src[] = {0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6}; + vertex_t h_dst[] = {1, 2, 5, 0, 2, 3, 4, 6, 0, 1, 1, 1, 0, 1}; + weight_t h_wgt[] = { + 1.2f, 1.3f, 1.6f, 1.2f, 2.3f, 2.4f, 2.5f, 2.7f, 1.3f, 2.3f, 2.4f, 2.5f, 1.6f, 2.7f}; + + vertex_t h_result_src[] = {0, 2, 2, 1, 1, 0}; + vertex_t h_result_dst[] = {1, 1, 0, 0, 2, 2}; + weight_t h_result_wgt[] = {1.2f, 2.3f, 1.3f, 1.2f, 2.3f, 1.3f}; + + return generic_k_truss_test(handle, + h_src, + h_dst, + h_wgt, + num_edges, + num_results, + k, + FALSE, + h_result_src, + h_result_dst, + h_result_wgt); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_k_truss_subgraph, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp new file mode 100644 index 00000000000..a1624949007 --- /dev/null +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/device_comm_wrapper.hpp" +#include "utilities/mg_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include + +struct KTruss_Usecase { + int32_t k_{3}; + bool test_weighted_{false}; + bool edge_masking_{false}; + bool check_correctness_{true}; +}; + +template +class Tests_MGKTruss + : public ::testing::TestWithParam> { + public: + Tests_MGKTruss() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running KTruss on multiple GPUs to that of a single-GPU run + template + void run_current_test(KTruss_Usecase const& k_truss_usecase, input_usecase_t const& input_usecase) + { + using weight_t = float; + + HighResTimer hr_timer{}; + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + auto [mg_graph, edge_weight, mg_renumber_map] = + cugraph::test::construct_graph( + *handle_, input_usecase, k_truss_usecase.test_weighted_, true, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + + std::optional> edge_mask{std::nullopt}; + if (k_truss_usecase.edge_masking_) { + edge_mask = cugraph::test::generate::edge_property( + *handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + // 2. run MG KTruss + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG KTruss"); + } + + auto mg_edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt; + auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = + cugraph::k_truss( + *handle_, mg_graph_view, mg_edge_weight_view, k_truss_usecase.k_, false); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. Compare SG & MG results + + if (k_truss_usecase.check_correctness_) { + cugraph::unrenumber_int_vertices( + *handle_, + d_cugraph_srcs.data(), + d_cugraph_srcs.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); + + cugraph::unrenumber_int_vertices( + *handle_, + d_cugraph_dsts.data(), + d_cugraph_dsts.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); + + auto global_d_cugraph_srcs = cugraph::test::device_gatherv( + *handle_, raft::device_span(d_cugraph_srcs.data(), d_cugraph_srcs.size())); + + auto global_d_cugraph_dsts = cugraph::test::device_gatherv( + *handle_, raft::device_span(d_cugraph_dsts.data(), d_cugraph_srcs.size())); + + rmm::device_uvector d_sorted_cugraph_srcs{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_cugraph_dsts{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_cugraph_wgts{0, handle_->get_stream()}; + + if (edge_weight) { + auto global_d_cugraph_wgts = cugraph::test::device_gatherv( + *handle_, + raft::device_span((*d_cugraph_wgts).data(), (*d_cugraph_wgts).size())); + + std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts, d_sorted_cugraph_wgts) = + cugraph::test::sort_by_key( + *handle_, global_d_cugraph_srcs, global_d_cugraph_dsts, global_d_cugraph_wgts); + + } else { + std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts) = + cugraph::test::sort(*handle_, global_d_cugraph_srcs, global_d_cugraph_dsts); + } + + // 3-1. Convert to SG graph + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + if (handle_->get_comms().get_rank() == int{0}) { + auto sg_graph_view = sg_graph.view(); + + // 3-2. Run SG KTruss + auto [ref_d_cugraph_srcs, ref_d_cugraph_dsts, ref_d_cugraph_wgts] = + cugraph::k_truss( + *handle_, sg_graph_view, sg_edge_weight_view, k_truss_usecase.k_, false); + + rmm::device_uvector d_sorted_ref_cugraph_srcs{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_ref_cugraph_dsts{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_ref_cugraph_wgts{0, handle_->get_stream()}; + + if (edge_weight) { + std::tie( + d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts, d_sorted_ref_cugraph_wgts) = + cugraph::test::sort_by_key( + *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts, *ref_d_cugraph_wgts); + + } else { + std::tie(d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts) = + cugraph::test::sort(*handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts); + } + + // 3-3. Compare + auto h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_cugraph_srcs); + auto h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_cugraph_dsts); + auto ref_h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_srcs); + auto ref_h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_dsts); + + ASSERT_TRUE( + std::equal(h_cugraph_srcs.begin(), h_cugraph_srcs.end(), ref_h_cugraph_srcs.begin())); + + ASSERT_TRUE( + std::equal(h_cugraph_dsts.begin(), h_cugraph_dsts.end(), ref_h_cugraph_dsts.begin())); + + if (edge_weight) { + auto ref_h_cugraph_wgts = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_wgts); + + auto h_cugraph_wgts = cugraph::test::to_host(*handle_, d_sorted_cugraph_wgts); + + ASSERT_TRUE( + std::equal(h_cugraph_wgts.begin(), h_cugraph_wgts.end(), ref_h_cugraph_wgts.begin())); + } + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGKTruss::handle_ = nullptr; + +using Tests_MGKTruss_File = Tests_MGKTruss; +using Tests_MGKTruss_Rmat = Tests_MGKTruss; + +TEST_P(Tests_MGKTruss_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGKTruss_Rmat, CheckInt64Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGKTruss_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(KTruss_Usecase{4, false, true, true}, KTruss_Usecase{5, true, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGKTruss_Rmat, + ::testing::Combine( + ::testing::Values(KTruss_Usecase{4, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGKTruss_Rmat, + ::testing::Combine( + ::testing::Values(KTruss_Usecase{4, false, false, false}, + KTruss_Usecase{5, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp index 8f674e6a6de..3bcabb6b6df 100644 --- a/cpp/tests/link_prediction/mg_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_similarity_test.cpp @@ -106,7 +106,7 @@ class Tests_MGSimilarity auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); - std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp index 192caa5227e..730a3ac8f08 100644 --- a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -108,7 +108,7 @@ class Tests_MGSimilarity auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); - std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 681a7d8e6ff..fc6369ec721 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -149,6 +149,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::get<1>(mg_vertex_pair_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 48bbc6176d8..06a23880d81 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -175,6 +175,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::get<1>(mg_vertex_pair_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp index 3d3d881fb23..b8ad06dd18b 100644 --- a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp +++ b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp @@ -123,6 +123,7 @@ class Tests_MGHasEdgeAndComputeMultiplicity store_transposed ? d_mg_edge_srcs : d_mg_edge_dsts, std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index b6898fbaf78..0a706d1cf80 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -335,6 +335,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { store_transposed ? tmp_src_v : tmp_dst_v, tmp_weights_v, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t,