From bfc4ac6217e8b45455223e9c69c81187a55fbf9d Mon Sep 17 00:00:00 2001 From: Md Naim Date: Wed, 14 Jun 2023 13:59:00 -0700 Subject: [PATCH 01/22] Debug- pass around pointer to weights --- .../cugraph/edge_partition_device_view.cuh | 17 +- cpp/src/link_prediction/similarity_impl.cuh | 1 + cpp/src/prims/detail/nbr_intersection.cuh | 410 ++++++++- ..._v_pair_transform_dst_nbr_intersection.cuh | 40 +- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 1 + cpp/tests/CMakeLists.txt | 6 + ...r_v_pair_transform_dst_nbr_intersection.cu | 2 + ...transform_dst_nbr_weighted_intersection.cu | 839 ++++++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 17 +- 9 files changed, 1324 insertions(+), 9 deletions(-) create mode 100644 cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 02b931fbde6..163bc0a709c 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -111,8 +111,23 @@ class edge_partition_device_view_base_t { // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC __device__ thrust::tuple local_edges( - vertex_t major_idx) const noexcept + vertex_t major_idx, bool debug = false) const noexcept { + if (debug && major_idx == 0) { + printf("offsets_.size(): %d\n", static_cast(offsets_.size())); + + printf("offsets_: "); + for (size_t k = 0; k < offsets_.size(); k++) { + printf("%d ", static_cast(offsets_[k])); + } + printf("\n"); + + printf("indices_: "); + for (size_t k = 0; k < indices_.size(); k++) { + printf("%d ", static_cast(indices_[k])); + } + printf("\n"); + } auto edge_offset = offsets_[major_idx]; auto local_degree = offsets_[major_idx + 1] - edge_offset; auto indices = indices_.data() + edge_offset; diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 97c8017c668..5a7704f5b1e 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -74,6 +74,7 @@ rmm::device_uvector similarity( vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs, out_degrees.begin(), + cugraph::edge_dummy_property_t{}.view(), [functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) { return functor.compute_score(static_cast(v1_degree), static_cast(v2_degree), diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 2d0d0a876e6..da29a169313 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -168,12 +169,16 @@ struct update_rx_major_local_degree_t { } }; -template +template struct update_rx_major_local_nbrs_t { int major_comm_size{}; int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; + edge_partition_e_input_device_view_t edge_partition_e_value_input{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -185,6 +190,9 @@ struct update_rx_major_local_nbrs_t { raft::device_span local_nbrs_for_rx_majors{}; + raft::device_span + local_nbrs_weights_for_rx_majors{}; + __device__ void operator()(size_t idx) const { auto it = thrust::upper_bound( @@ -222,6 +230,21 @@ struct update_rx_major_local_nbrs_t { local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]); + + if constexpr (!std::is_same_v) { + thrust::transform( + thrust::seq, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(local_degree), + local_nbrs_weights_for_rx_majors.begin() + + local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + + local_edge_partition_idx] + + offset_in_local_edge_partition], + [&] __device__(auto edge_weight_idx) { + return edge_partition_e_value_input.get(edge_offset + edge_weight_idx); + }); + } } }; @@ -317,32 +340,56 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; + raft::device_span + first_element_weights{nullptr}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; + raft::device_span + second_element_weights{nullptr}; edge_partition_device_view_t edge_partition{}; + edge_partition_e_input_device_view_t edge_partition_e_value_input{}; VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; + raft::device_span + nbr_intersection_weights{nullptr}; vertex_t invalid_id{}; + int rank{1000}; + size_t edge_partition_idx{1000}; + __device__ edge_t operator()(size_t i) const { + using weight_t = typename edge_partition_e_input_device_view_t::value_type; + auto pair = *(vertex_pair_first + i); + printf( + "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); + + if constexpr (!std::is_same_v) { + printf("called with FirstElementToIdxMap********\n"); + } + if constexpr (!std::is_same_v) { + printf("called with SecondElementToIdxMap---------\n"); + } + vertex_t const* indices0{nullptr}; [[maybe_unused]] edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { + printf("element0 from edge_partition\n"); vertex_t major = thrust::get<0>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -363,16 +410,48 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { + printf("element0 from first_element_to_idx_map******\n"); auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_degree0 = static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); indices0 = first_element_indices.begin() + first_element_offsets[idx]; + + local_edge_offset0 = first_element_offsets[idx]; } + vertex_t element0 = thrust::get<0>(pair); + printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", + static_cast(element0), + static_cast(local_degree0), + static_cast(local_edge_offset0)); + + for (edge_t k = 0; k < local_degree0; k++) { + if constexpr (std::is_same_v) { + if constexpr (!std::is_same_v) { + weight_t ew{}; + ew = edge_partition_e_value_input.get(local_edge_offset0 + k); + printf("(%d %.2f)* ", static_cast(indices0[k]), static_cast(ew)); + } else { + printf("%d* ", static_cast(indices0[k])); + } + + } else { + if constexpr (!std::is_same_v) { + weight_t ew = *(first_element_weights.begin() + local_edge_offset0 + k); + printf("(%d %.2f)* ", static_cast(indices0[k]), static_cast(ew)); + } else { + printf("%d* ", static_cast(indices0[k])); + } + } + } + + printf("\n"); + vertex_t const* indices1{nullptr}; [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { + printf("element1 from edge_partition\n"); vertex_t major = thrust::get<1>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -393,12 +472,42 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { + printf("element1 from second_element_to_idx_map----\n"); auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_degree1 = static_cast(second_element_offsets[idx + 1] - second_element_offsets[idx]); indices1 = second_element_indices.begin() + second_element_offsets[idx]; + + local_edge_offset1 = second_element_offsets[idx]; } + vertex_t element1 = thrust::get<1>(pair); + printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", + static_cast(element1), + static_cast(local_degree1), + static_cast(local_edge_offset1)); + + for (edge_t k = 0; k < local_degree1; k++) { + if constexpr (std::is_same_v) { + if constexpr (!std::is_same_v) { + weight_t ew{}; + ew = edge_partition_e_value_input.get(local_edge_offset1 + k); + printf("(%d %.2f)- ", static_cast(indices1[k]), static_cast(ew)); + } else { + printf("%d- ", static_cast(indices1[k])); + } + + } else { + if constexpr (!std::is_same_v) { + weight_t ew = *(second_element_weights.begin() + local_edge_offset1 + k); + printf("(%d %.2f)- ", static_cast(indices1[k]), static_cast(ew)); + } else { + printf("%d- ", static_cast(indices1[k])); + } + } + } + printf("\n"); + // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) @@ -553,18 +662,29 @@ size_t count_invalid_vertex_pairs(raft::handle_t const& handle, // thrust::distance(vertex_pair_first, vertex_pair_last) should be comparable across the global // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID % // number of groups" is recommended for load-balancing. -template +template std::tuple, rmm::device_uvector> nbr_intersection(raft::handle_t const& handle, GraphViewType const& graph_view, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, + EdgeValueInputWrapper edge_value_input, std::array intersect_dst_nbr, bool do_expensive_check = false) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; + using edge_partition_e_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_edge_dummy_property_device_view_t, + detail::edge_partition_edge_property_device_view_t< + edge_t, + typename EdgeValueInputWrapper::value_iterator, + typename EdgeValueInputWrapper::value_type>>; + + using weight_t = typename edge_partition_e_input_device_view_t::value_type; + static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -601,6 +721,8 @@ nbr_intersection(raft::handle_t const& handle, std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; + std::optional> major_nbr_weights{std::nullopt}; + if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { auto& comm = handle.get_comms(); @@ -613,6 +735,8 @@ nbr_intersection(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); + auto const comm_rank = handle.get_comms().get_rank(); + // 2.1 Find unique second pair element majors rmm::device_uvector unique_majors(input_size, handle.get_stream()); @@ -663,6 +787,20 @@ nbr_intersection(raft::handle_t const& handle, } } + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector( + "unique_majors", unique_majors.data(), unique_majors.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + // 2.2 Send majors and group (major_comm_rank, edge_partition_idx) counts rmm::device_uvector rx_majors(0, handle.get_stream()); @@ -702,6 +840,26 @@ nbr_intersection(raft::handle_t const& handle, size_t{0}); } + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector( + "d_tx_group_counts", d_tx_group_counts.data(), d_tx_group_counts.size(), std::cout); + + std::cout << "tx_counts:"; + std::copy(tx_counts.data(), + tx_counts.data() + tx_counts.size(), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + std::tie(rx_majors, rx_major_counts) = shuffle_values(major_comm, unique_majors.begin(), tx_counts, handle.get_stream()); @@ -712,10 +870,35 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_majors", rx_majors.data(), rx_majors.size(), std::cout); + + std::cout << "rx_major_counts:"; + std::copy(rx_major_counts.data(), + rx_major_counts.data() + rx_major_counts.size(), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + + raft::print_device_vector( + "rx_group_counts", rx_group_counts.data(), rx_group_counts.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + // 2.3. Enumerate degrees and neighbors for the received majors rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); + rmm::device_uvector local_nbrs_weights_for_rx_majors(size_t{0}, + handle.get_stream()); + std::vector local_nbr_counts{}; { rmm::device_uvector rx_reordered_group_counts( @@ -730,6 +913,22 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span( rx_group_counts.data(), rx_group_counts.size())}); + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_reordered_group_counts", + rx_reordered_group_counts.data(), + rx_reordered_group_counts.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + rmm::device_uvector d_rx_reordered_group_lasts(rx_reordered_group_counts.size(), handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), @@ -749,6 +948,20 @@ nbr_intersection(raft::handle_t const& handle, rx_group_counts.end(), rx_group_firsts.begin()); + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector( + "rx_group_firsts", rx_group_firsts.data(), rx_group_firsts.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + local_degrees_for_rx_majors.resize(rx_majors.size(), handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = @@ -776,6 +989,22 @@ nbr_intersection(raft::handle_t const& handle, local_degrees_for_rx_majors.size())}); } + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("local_degrees_for_rx_majors", + local_degrees_for_rx_majors.data(), + local_degrees_for_rx_majors.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + rmm::device_uvector local_nbr_offsets_for_rx_majors( local_degrees_for_rx_majors.size() + 1, handle.get_stream()); local_nbr_offsets_for_rx_majors.set_element_to_zero_async(size_t{0}, handle.get_stream()); @@ -788,10 +1017,33 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); + + local_nbrs_weights_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), + handle.get_stream()); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("local_nbr_offsets_for_rx_majors", + local_nbr_offsets_for_rx_majors.data(), + local_nbr_offsets_for_rx_majors.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, i); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -801,10 +1053,14 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(reordered_idx_first), thrust::make_counting_iterator(reordered_idx_last), - update_rx_major_local_nbrs_t{ + update_rx_major_local_nbrs_t{ major_comm_size, minor_comm_size, edge_partition, + edge_partition_e_value_input, reordered_idx_first, i, raft::device_span( @@ -814,7 +1070,25 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span(local_nbr_offsets_for_rx_majors.data(), local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), - local_nbrs_for_rx_majors.size())}); + local_nbrs_for_rx_majors.size()), + raft::device_span(local_nbrs_weights_for_rx_majors.data(), + local_nbrs_weights_for_rx_majors.size())}); + } + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("local_nbrs_for_rx_majors", + local_nbrs_for_rx_majors.data(), + local_nbrs_for_rx_majors.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -838,6 +1112,22 @@ nbr_intersection(raft::handle_t const& handle, d_local_nbr_counts.size(), handle.get_stream()); handle.sync_stream(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("d_local_nbr_counts", + d_local_nbr_counts.data(), + d_local_nbr_counts.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } } // 2.4 Send the degrees and neighbors back @@ -857,9 +1147,56 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_offsets).begin() + 1); } + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("(*major_nbr_offsets)", + (*major_nbr_offsets).data(), + (*major_nbr_offsets).size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); + if constexpr (!std::is_same_v) { + std::tie(*major_nbr_weights, std::ignore) = + shuffle_values(major_comm, + local_nbrs_weights_for_rx_majors.begin(), + local_nbr_counts, + handle.get_stream()); + } + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("(*major_nbr_indices)", + (*major_nbr_indices).data(), + (*major_nbr_indices).size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_weights)", + (*major_nbr_weights).data(), + (*major_nbr_weights).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + major_to_idx_map_ptr = std::make_unique>( unique_majors.begin(), unique_majors.end(), @@ -887,6 +1224,9 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); + + rmm::device_uvector nbr_intersection_weights(size_t{0}, handle.get_stream()); + if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_rank = minor_comm.get_rank(); @@ -944,6 +1284,10 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_sizes(size_t{0}, handle.get_stream()); rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); + + rmm::device_uvector rx_v_pair_nbr_intersection_weights(size_t{0}, + handle.get_stream()); + std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); { auto vertex_pair_buffer = allocate_dataframe_buffer>( @@ -966,6 +1310,9 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, i); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); rx_v_pair_nbr_intersection_sizes.resize( @@ -1003,7 +1350,38 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_indices.resize( rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + + rx_v_pair_nbr_intersection_weights.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("(*major_nbr_indices)", + (*major_nbr_indices).data(), + (*major_nbr_indices).size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_weights)", + (*major_nbr_weights).data(), + (*major_nbr_weights).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + auto second_element_to_idx_map = detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( @@ -1016,21 +1394,30 @@ nbr_intersection(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer)), vertex_t, edge_t, + edge_partition_e_input_device_view_t, true>{nullptr, raft::device_span(), raft::device_span(), + raft::device_span(), second_element_to_idx_map, raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), raft::device_span((*major_nbr_indices).data(), (*major_nbr_indices).size()), + raft::device_span((*major_nbr_weights).data(), + (*major_nbr_weights).size()), edge_partition, + edge_partition_e_value_input, get_dataframe_buffer_begin(vertex_pair_buffer), raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), rx_v_pair_nbr_intersection_offsets.size()), raft::device_span(rx_v_pair_nbr_intersection_indices.data(), rx_v_pair_nbr_intersection_indices.size()), - invalid_vertex_id::value}); + raft::device_span(rx_v_pair_nbr_intersection_weights.data(), + rx_v_pair_nbr_intersection_weights.size()), + invalid_vertex_id::value, + handle.get_comms().get_rank(), + i}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1222,6 +1609,7 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); + nbr_intersection_weights.resize(nbr_intersection_indices.size(), handle.get_stream()); size_t size_offset{0}; size_t index_offset{0}; for (size_t i = 0; i < edge_partition_nbr_intersection_sizes.size(); ++i) { @@ -1249,6 +1637,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_device_view_t( graph_view.local_edge_partition_view(size_t{0})); + auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); rmm::device_uvector nbr_intersection_sizes( input_size, handle.get_stream()); // initially store minimum degrees (upper bound for intersection sizes) @@ -1278,6 +1667,7 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + nbr_intersection_weights.resize(nbr_intersection_indices.size(), handle.get_stream()); if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { thrust::tabulate( handle.get_thrust_policy(), @@ -1288,20 +1678,28 @@ nbr_intersection(raft::handle_t const& handle, decltype(vertex_pair_first), vertex_t, edge_t, + edge_partition_e_input_device_view_t, false>{ nullptr, raft::device_span(), raft::device_span(), + raft::device_span(), nullptr, raft::device_span(), raft::device_span(), + raft::device_span(), edge_partition, + edge_partition_e_value_input, vertex_pair_first, raft::device_span(nbr_intersection_offsets.data(), nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), - invalid_vertex_id::value}); + raft::device_span(nbr_intersection_weights.data(), + nbr_intersection_weights.size()), + invalid_vertex_id::value, + 0, + 0}); } else { CUGRAPH_FAIL("unimplemented."); } diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index f30de0750e3..b1d0f971630 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include @@ -149,6 +149,19 @@ struct call_intersection_op_t { src_prop = *(vertex_property_first + src_offset); dst_prop = *(vertex_property_first + dst_offset); } + + printf("(%d <-> %d) %d %d %d\n", + static_cast(src), + static_cast(dst), + static_cast(src_prop), + static_cast(dst_prop), + static_cast(intersection.size())); + + for (size_t k = 0; k < intersection.size(); k++) { + printf("%d ", static_cast(*(intersection.data() + k))); + } + printf("\n"); + *(major_minor_pair_value_output_first + index) = intersection_op(src, dst, src_prop, dst_prop, intersection); } @@ -166,6 +179,7 @@ struct call_intersection_op_t { * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPairIterator Type of the iterator for input vertex pairs. * @tparam VertexValueInputWrapper Type of the wrapper for vertex property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. * @tparam IntersectionOp Type of the quinary per intersection operator. * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property * variables. @@ -176,6 +190,10 @@ struct call_intersection_op_t { * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair. * @param vertex_src_value_input Wrapper used to access vertex input property values (for the * vertices assigned to this process in multi-GPU). + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p intersection_op + * needs to access edge property values) or cugraph::edge_dummy_property_t::view() (if @p + * intersection_op does not access edge property values). * @param intersection_op quinary operator takes first vertex of the pair, second vertex of the * pair, property values for the first vertex, property values for the second vertex, and a list of * vertices in the intersection of the first & second vertices' destination neighbors and returns an @@ -188,6 +206,7 @@ struct call_intersection_op_t { template void per_v_pair_transform_dst_nbr_intersection( @@ -196,6 +215,7 @@ void per_v_pair_transform_dst_nbr_intersection( VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, VertexValueInputIterator vertex_value_input_first, + EdgeValueInputWrapper edge_value_input, IntersectionOp intersection_op, VertexPairValueOutputIterator vertex_pair_value_output_first, bool do_expensive_check = false) @@ -297,6 +317,23 @@ void per_v_pair_transform_dst_nbr_intersection( size_t{0}); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); @@ -352,6 +389,7 @@ void per_v_pair_transform_dst_nbr_intersection( graph_view, chunk_vertex_pair_first, chunk_vertex_pair_first + this_chunk_size, + edge_value_input, std::array{true, true}, do_expensive_check); diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index 4823c1febf4..0913c9eb28f 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -342,6 +342,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( graph_view, chunk_vertex_pair_first, chunk_vertex_pair_first + this_chunk_size, + cugraph::edge_dummy_property_t{}.view(), std::array{true, true}, do_expensive_check); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 7d4a2181af1..b9192e913fc 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -577,6 +577,12 @@ if(BUILD_CUGRAPH_MG_TESTS) prims/mg_per_v_pair_transform_dst_nbr_intersection.cu) target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST PRIVATE cuco::cuco) + ############################################################################################### + # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION tests ------------------------- + ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST + prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu) + target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST PRIVATE cuco::cuco) + ############################################################################################### # - MG NBR SAMPLING tests --------------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu) diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 0ff0a041a71..b9a95fb20be 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -163,6 +163,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), + cugraph::edge_dummy_property_t{}.view(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); @@ -231,6 +232,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection mg_aggregate_vertex_pair_buffer /* now unrenumbered */), cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered */), sg_out_degrees.begin(), + cugraph::edge_dummy_property_t{}.view(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu new file mode 100644 index 00000000000..ed0d4a077f2 --- /dev/null +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -0,0 +1,839 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +template +struct intersection_op_t { + __device__ thrust::tuple operator()( + vertex_t v0, + vertex_t v1, + edge_t v0_prop /* out degree */, + edge_t v1_prop /* out degree */, + raft::device_span intersection) const + { + // printf("\n%d %d %d %d %d\n", + // static_cast(v0), + // static_cast(v1), + // static_cast(v0_prop), + // static_cast(v1_prop), + // static_cast(intersection.size())); + return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); + } +}; + +struct Prims_Usecase { + size_t num_vertex_pairs{0}; + bool check_correctness{true}; +}; + +template +class Tests_MGPerVPairTransformDstNbrIntersection + : public ::testing::TestWithParam> { + public: + Tests_MGPerVPairTransformDstNbrIntersection() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + cugraph::graph_t mg_graph(*handle_); + std::optional> mg_renumber_map{std::nullopt}; + + // std::tie(mg_graph, std::ignore, mg_renumber_map) = + // cugraph::test::construct_graph( + // *handle_, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + std::string file_path = "/home/nfs/mnaim/csv/grid.csv"; + + constexpr bool store_transposed = false; + constexpr bool multi_gpu = true; + + std::optional< + cugraph::edge_property_t, + weight_t>> + edge_weights{std::nullopt}; + + /* + /// + // + // Create decision graph from edgelist + // + + // using DecisionGraphViewType = cugraph::graph_view_t; + + // cugraph::graph_t decision_graph(*handle_); + + // std::optional> renumber_map{std::nullopt}; + // std::optional> + coarse_edge_weights{ + // std::nullopt}; + + vertex_t N = 4; + vertex_t nr_valid_tuples = N * N - N; + + std::vector h_srcs(nr_valid_tuples); + std::vector h_dsts(nr_valid_tuples); + std::vector h_weights(nr_valid_tuples); + + // rmm::device_uvector d_srcs(nr_valid_tuples, handle_->get_stream()); + // rmm::device_uvector d_dsts(nr_valid_tuples, handle_->get_stream()); + // std::optional> d_weights = + // std::make_optional(rmm::device_uvector(nr_valid_tuples, + handle_->get_stream())); + + auto& comm = handle_->get_comms(); + auto& major_comm = handle_->get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle_->get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto gpu_id_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ + comm_size, major_comm_size, minor_comm_size}; + std::srand(comm_rank); + + int edge_counter = 0; + for (vertex_t i = 0; i < N; i++) { + for (vertex_t j = 0; j < N; j++) { + if (i != j) { + h_srcs[edge_counter] = i; + h_dsts[edge_counter] = j; + h_weights[edge_counter] = std::max(i, j) * 10 + std::min(i, j); + edge_counter++; + } + } + } + + comm.barrier(); + if (comm_rank == 0) + for (int i = 0; i < edge_counter; i++) { + std::cout << "(" << h_srcs[i] << "," << h_dsts[i] << ") => " + << gpu_id_key_func(h_srcs[i], h_dsts[i]) << std::endl; + } + comm.barrier(); + + auto d_srcs = cugraph::test::to_device(*handle_, h_srcs); + auto d_dsts = cugraph::test::to_device(*handle_, h_dsts); + auto d_weights = std::make_optional(cugraph::test::to_device(*handle_, h_weights)); + + if (multi_gpu) { + auto& comm = handle_->get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "Rank :" << comm_rank << std::endl; + + std::cout << " d_srcs.size(): " << d_srcs.size() << " d_dsts.size(): " << d_dsts.size() + << " (*d_weights).size(): " << (*d_weights).size() << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector("d_srcs: ", d_srcs.data(), d_srcs.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector("d_dsts: ", d_dsts.data(), d_dsts.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector( + "(*d_weights): ", (*d_weights).data(), (*d_weights).size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + + std::tie(store_transposed ? d_dsts : d_srcs, + store_transposed ? d_srcs : d_dsts, + d_weights, + std::ignore, + std::ignore) = + cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + vertex_t, + weight_t, + int32_t>(*handle_, + store_transposed ? std::move(d_dsts) : std::move(d_srcs), + store_transposed ? std::move(d_srcs) : std::move(d_dsts), + std::move(d_weights), + std::nullopt, + std::nullopt); + + cugraph::test::sort_and_remove_multi_edges(*handle_, d_srcs, d_dsts, d_weights); + + if (multi_gpu) { + auto& comm = handle_->get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "Rank :" << comm_rank << std::endl; + + std::cout << " d_srcs.size(): " << d_srcs.size() << " d_dsts.size(): " << d_dsts.size() + << " (*d_weights).size(): " << (*d_weights).size() << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector("d_srcs: ", d_srcs.data(), d_srcs.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector("d_dsts: ", d_dsts.data(), d_dsts.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + raft::print_device_vector( + "(*d_weights): ", (*d_weights).data(), (*d_weights).size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + + std::cout << "Before create_graph_from_edgelist ... " << std::endl; + std::tie(mg_graph, edge_weights, std::ignore, std::ignore, mg_renumber_map) = + cugraph::create_graph_from_edgelist(*handle_, + std::nullopt, + std::move(d_srcs), + std::move(d_dsts), + std::move(d_weights), + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, false}, + true, + true); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "Returned from create_graph_from_edgelist" << std::endl; + // auto decision_graph_view = decision_graph.view(); + + /// + */ + + bool test_weighted = true; + bool renumber = true; + std::tie(mg_graph, edge_weights, mg_renumber_map) = + cugraph::test::read_graph_from_csv_file( + *handle_, file_path, test_weighted, renumber); + + auto mg_graph_view = mg_graph.view(); + auto edge_weight_view = (*edge_weights).view(); + + using GraphViewType = decltype(mg_graph.view()); + + if (GraphViewType::is_multi_gpu) { + auto vertex_partitions_range_lasts = + cugraph::test::to_device(*handle_, mg_graph_view.vertex_partition_range_lasts()); + + auto& comm = handle_->get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + auto& major_comm = handle_->get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto const major_comm_rank = major_comm.get_rank(); + + auto& minor_comm = handle_->get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "Rank :" << comm_rank << std::endl; + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + std::cout << "(" << major_comm_size << minor_comm_size << ")" << std::endl; + + std::cout << major_comm_rank << minor_comm.get_rank() << std::endl; + + raft::print_device_vector("vertex_partitions_range_lasts:", + vertex_partitions_range_lasts.data(), + vertex_partitions_range_lasts.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle_->get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + /* + std::vector h_major_range_lasts(mg_graph_view.number_of_local_edge_partitions()); + for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + cugraph::edge_partition_device_view_t( + mg_graph_view.local_edge_partition_view(i)); + h_major_range_lasts[i] = edge_partition.major_range_last(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "(rank = " << comm_rank << ", edge partittion idx = " << i + << ") : " << edge_partition.major_range_first() << " -- " + << edge_partition.major_range_last() << std::endl; + } + comm.barrier(); + } + } + + rmm::device_uvector d_major_range_lasts(h_major_range_lasts.size(), + handle_->get_stream()); + raft::update_device(d_major_range_lasts.data(), + h_major_range_lasts.data(), + h_major_range_lasts.size(), + handle_->get_stream()); + handle_->sync_stream(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("d_major_range_lasts: ", + d_major_range_lasts.data(), + d_major_range_lasts.size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + */ + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + std::cout << "edge_counts: "; + std::copy(edge_weight_view.edge_counts().begin(), + edge_weight_view.edge_counts().end(), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + edge_t num_edges = std::reduce(edge_weight_view.edge_counts().begin(), + edge_weight_view.edge_counts().end()); + + std::cout << std::endl << "num_edges: " << num_edges << std::endl; + + for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { + std::cout << "partition " << i << " weights"; + raft::print_device_vector(":", + edge_weight_view.value_firsts()[i], + edge_weight_view.edge_counts()[i], + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("(*mg_renumber_map): ", + (*mg_renumber_map).data(), + (*mg_renumber_map).size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + cugraph::edge_partition_device_view_t( + mg_graph_view.local_edge_partition_view(i)); + + auto edge_partition_weight_view = + cugraph::detail::edge_partition_edge_property_device_view_t( + edge_weight_view, i); + + auto edge_partition_weight_value_ptr = edge_partition_weight_view.value_first(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("edge_weight_view: ", + edge_weight_view.value_firsts()[i], + edge_weight_view.edge_counts()[i], + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + std::cout << "rank = " << comm_rank << ", edge partittion idx = " << i << " : " + << edge_partition.major_range_first() << "--" + << edge_partition.major_range_last() << std::endl; + + thrust::for_each( + handle_->get_thrust_policy(), + thrust::make_counting_iterator(edge_partition.major_range_first()), + thrust::make_counting_iterator(edge_partition.major_range_last()), + [edge_partition, edge_partition_weight_value_ptr] __device__(vertex_t major) { + vertex_t major_idx{}; + auto major_hypersparse_first = edge_partition.major_hypersparse_first(); + if (major_hypersparse_first) { + if (major < *major_hypersparse_first) { + major_idx = edge_partition.major_offset_from_major_nocheck(major); + } else { + auto major_hypersparse_idx = + edge_partition.major_hypersparse_idx_from_major_nocheck(major); + if (!major_hypersparse_idx) { return true; } + major_idx = + edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) + + *major_hypersparse_idx; + } + } else { + major_idx = edge_partition.major_offset_from_major_nocheck(major); + } + vertex_t const* indices{nullptr}; + edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = + edge_partition.local_edges(major_idx, true); + + // std::optional> edge_weight_view; + + auto number_of_edges = edge_partition.number_of_edges(); + + printf("vertex = %d offset_idx = %d deg= %d number_of_edges=%d\n", + major, + edge_offset, + local_degree, + number_of_edges); + for (edge_t nbr_idx = 0; nbr_idx < local_degree; nbr_idx++) { + // printf("%d ", indices[nbr_idx]); + printf("%d %d %.2f \n", + major, + indices[nbr_idx], + *(edge_partition_weight_value_ptr + edge_offset + nbr_idx)); + } + printf("\n"); + }); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + } // end of loop over edge partitions + } + + /* + + if (multi_gpu) { + auto& comm = handle_->get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::cout << "Rank :" << comm_rank << std::endl; + + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + + */ + + // #if 0 + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive + + ASSERT_TRUE( + mg_graph_view.number_of_vertices() > + vertex_t{0}); // the code below to generate vertex pairs is invalid for an empty graph. + + auto mg_vertex_pair_buffer = + cugraph::allocate_dataframe_buffer>( + prims_usecase.num_vertex_pairs / comm_size + + (static_cast(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0), + handle_->get_stream()); + thrust::tabulate( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { + cuco::detail::MurmurHash3_32 + hash_func{}; // use hash_func to generate arbitrary vertex pairs + auto v0 = 2; // static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = + 3; // static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + return thrust::make_tuple(v0, v1); + }); + + auto h_vertex_partition_range_lasts = mg_graph_view.vertex_partition_range_lasts(); + std::tie(std::get<0>(mg_vertex_pair_buffer), + std::get<1>(mg_vertex_pair_buffer), + std::ignore, + std::ignore, + std::ignore) = + cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + edge_t, + weight_t, + int32_t>(*handle_, + std::move(std::get<0>(mg_vertex_pair_buffer)), + std::move(std::get<1>(mg_vertex_pair_buffer)), + std::nullopt, + std::nullopt, + std::nullopt, + h_vertex_partition_range_lasts); + + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("std::get<0>(mg_vertex_pair_buffer)", + std::get<0>(mg_vertex_pair_buffer).data(), + std::get<0>(mg_vertex_pair_buffer).size(), + std::cout); + + raft::print_device_vector("std::get<1>(mg_vertex_pair_buffer)", + std::get<1>(mg_vertex_pair_buffer).data(), + std::get<1>(mg_vertex_pair_buffer).size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); + auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection"); + } + + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + mg_graph_view, + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + mg_out_degrees.begin(), + edge_weight_view, + intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(mg_result_buffer)); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("std::get<0>(mg_result_buffer)", + std::get<0>(mg_result_buffer).data(), + std::get<0>(mg_result_buffer).size(), + std::cout); + + raft::print_device_vector("std::get<1>(mg_result_buffer)", + std::get<1>(mg_result_buffer).data(), + std::get<1>(mg_result_buffer).size(), + std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + // 3. validate MG results + + if (prims_usecase.check_correctness) { + cugraph::unrenumber_int_vertices( + *handle_, + std::get<0>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*mg_renumber_map).data(), + h_vertex_partition_range_lasts); + cugraph::unrenumber_int_vertices( + *handle_, + std::get<1>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*mg_renumber_map).data(), + h_vertex_partition_range_lasts); + + auto mg_aggregate_vertex_pair_buffer = + cugraph::allocate_dataframe_buffer>( + 0, handle_->get_stream()); + std::get<0>(mg_aggregate_vertex_pair_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<0>(mg_vertex_pair_buffer).data(), + std::get<0>(mg_vertex_pair_buffer).size()); + std::get<1>(mg_aggregate_vertex_pair_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<1>(mg_vertex_pair_buffer).data(), + std::get<1>(mg_vertex_pair_buffer).size()); + + auto mg_aggregate_result_buffer = + cugraph::allocate_dataframe_buffer>(0, handle_->get_stream()); + std::get<0>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( + *handle_, std::get<0>(mg_result_buffer).data(), std::get<0>(mg_result_buffer).size()); + std::get<1>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( + *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); + + cugraph::graph_t sg_graph(*handle_); + std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_graph_view = sg_graph.view(); + + auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); + auto sg_out_degrees = sg_graph_view.compute_out_degrees(*handle_); + + // cugraph::per_v_pair_transform_dst_nbr_intersection( + // *handle_, + // sg_graph_view, + // cugraph::get_dataframe_buffer_begin( + // mg_aggregate_vertex_pair_buffer /* now unrenumbered */), + // cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered + // */), sg_out_degrees.begin(), edge_weight_view, intersection_op_t{}, + // cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + // bool valid = thrust::equal(handle_->get_thrust_policy(), + // cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), + // cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), + // cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + // ASSERT_TRUE(valid); + } + } + // #endif + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGPerVPairTransformDstNbrIntersection::handle_ = nullptr; + +using Tests_MGPerVPairTransformDstNbrIntersection_File = + Tests_MGPerVPairTransformDstNbrIntersection; +using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = + Tests_MGPerVPairTransformDstNbrIntersection; + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>(std::get<0>(param), + std::get<1>(param)); +} +/* +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} +*/ + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGPerVPairTransformDstNbrIntersection_File, + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1}, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +// INSTANTIATE_TEST_SUITE_P(rmat_small_test, +// Tests_MGPerVPairTransformDstNbrIntersection_Rmat, +// ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, +// true}), +// ::testing::Values(cugraph::test::Rmat_Usecase( +// 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +// INSTANTIATE_TEST_SUITE_P( +// rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with +// --gtest_filter to select only the rmat_benchmark_test with a specific +// vertex & edge type combination) by command line arguments and do not +// include more than one Rmat_Usecase that differ only in scale or edge +// factor (to avoid running same benchmarks more than once) */ +// Tests_MGPerVPairTransformDstNbrIntersection_Rmat, +// ::testing::Combine( +// ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), +// ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, +// false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 615522a863b..f062da5e060 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -154,6 +154,21 @@ read_edgelist_from_csv_file(raft::handle_t const& handle, bool store_transposed, bool multi_gpu); +template +std::tuple, + std::optional< + cugraph::edge_property_t, + weight_t>>, + std::optional>> +read_graph_from_csv_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + // alias for easy customization for debug purposes: // template @@ -517,7 +532,7 @@ mg_vertex_property_values_to_sg_vertex_property_values( std::optional> sg_renumber_map, // std::nullopt if the SG graph is not renumbered std::optional> - mg_vertices, // std::nullopt if the entire local vertex partition range is assumed + mg_vertices, // std::nullopt if the entire local vertex partition range is assumed raft::device_span mg_values); template From f4d5e513944279b91f9be32efb0937780d2ddd7c Mon Sep 17 00:00:00 2001 From: Md Naim Date: Thu, 13 Jul 2023 16:52:53 -0700 Subject: [PATCH 02/22] Changes for weighted similarity, with debug statements --- cpp/src/link_prediction/similarity_impl.cuh | 49 +- cpp/src/prims/detail/nbr_intersection.cuh | 1065 ++++++++++++++--- ..._v_pair_transform_dst_nbr_intersection.cuh | 134 ++- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 48 +- .../link_prediction/mg_similarity_test.cpp | 9 +- ...r_v_pair_transform_dst_nbr_intersection.cu | 16 +- ...transform_dst_nbr_weighted_intersection.cu | 60 +- 7 files changed, 1139 insertions(+), 242 deletions(-) diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 5a7704f5b1e..b48f685c064 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -58,8 +58,7 @@ rmm::device_uvector similarity( // max weight((u,a), (a,v)). // Use these to compute weighted score // - CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); - } else { + rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); // @@ -71,11 +70,55 @@ rmm::device_uvector similarity( per_v_pair_transform_dst_nbr_intersection( handle, graph_view, + *edge_weight_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs, out_degrees.begin(), + [functor] __device__(auto v1, + auto v2, + auto v1_degree, + auto v2_degree, + auto intersection, + auto properties0, + auto properties1) { + for (size_t k = 0; k < intersection.size(); k++) { + printf("=> %d %f %f\n", + static_cast(intersection[k]), + static_cast(properties0[k]), + static_cast(properties1[k])); + } + + weight_t weight_a = 1; + weight_t weight_b = 1; + weight_t min_weight_a_intersect_b = 1; + return functor.compute_score(static_cast(weight_a), + static_cast(weight_b), + static_cast(min_weight_a_intersect_b)); + }, + similarity_score.begin(), + do_expensive_check); + + return similarity_score; + + // CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); + } else { + rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); + + // + // Compute vertex_degree for all vertices, then distribute to each GPU. + // Need to use this instead of the dummy properties below + // + auto out_degrees = graph_view.compute_out_degrees(handle); + + per_v_pair_transform_dst_nbr_intersection( + handle, + graph_view, cugraph::edge_dummy_property_t{}.view(), - [functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) { + vertex_pairs_begin, + vertex_pairs_begin + num_vertex_pairs, + out_degrees.begin(), + [functor] __device__( + auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) { return functor.compute_score(static_cast(v1_degree), static_cast(v2_degree), static_cast(intersection.size())); diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index da29a169313..32eb09d97c0 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -171,6 +171,7 @@ struct update_rx_major_local_degree_t { template struct update_rx_major_local_nbrs_t { @@ -190,10 +191,9 @@ struct update_rx_major_local_nbrs_t { raft::device_span local_nbrs_for_rx_majors{}; - raft::device_span - local_nbrs_weights_for_rx_majors{}; + raft::device_span local_nbrs_properties_for_rx_majors{}; - __device__ void operator()(size_t idx) const + __device__ void operator()(size_t idx) { auto it = thrust::upper_bound( thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx); @@ -222,28 +222,21 @@ struct update_rx_major_local_nbrs_t { // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) + + size_t pos = local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + + local_edge_partition_idx] + + offset_in_local_edge_partition]; thrust::copy( - thrust::seq, - indices, - indices + local_degree, - local_nbrs_for_rx_majors.begin() + - local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + - local_edge_partition_idx] + - offset_in_local_edge_partition]); - - if constexpr (!std::is_same_v) { - thrust::transform( - thrust::seq, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(local_degree), - local_nbrs_weights_for_rx_majors.begin() + - local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + - local_edge_partition_idx] + - offset_in_local_edge_partition], - [&] __device__(auto edge_weight_idx) { - return edge_partition_e_value_input.get(edge_offset + edge_weight_idx); - }); + thrust::seq, indices, indices + local_degree, local_nbrs_for_rx_majors.begin() + pos); + + if constexpr (!std::is_same_v) { + auto nbrs_properties_start = local_nbrs_properties_for_rx_majors.begin() + pos; + + auto eddge_property_start = edge_partition_e_value_input.value_first() + edge_offset; + + for (size_t k = 0; k < local_degree; k++) { + nbrs_properties_start[k] = *(eddge_property_start + k); + } } } }; @@ -340,20 +333,19 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; - raft::device_span - first_element_weights{nullptr}; + raft::device_span first_element_properties{nullptr}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; - raft::device_span - second_element_weights{nullptr}; + raft::device_span second_element_properties{nullptr}; edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; @@ -361,20 +353,19 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - raft::device_span - nbr_intersection_weights{nullptr}; + raft::device_span nbr_intersection_properties0{nullptr}; + raft::device_span nbr_intersection_properties1{nullptr}; vertex_t invalid_id{}; int rank{1000}; size_t edge_partition_idx{1000}; - __device__ edge_t operator()(size_t i) const + __device__ edge_t operator()(size_t i) { - using weight_t = typename edge_partition_e_input_device_view_t::value_type; - auto pair = *(vertex_pair_first + i); +#if 1 printf( "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); @@ -384,12 +375,18 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { if constexpr (!std::is_same_v) { printf("called with SecondElementToIdxMap---------\n"); } +#endif vertex_t const* indices0{nullptr}; + EdgeProperty_t const* property0{nullptr}; + [[maybe_unused]] edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { +#if 1 printf("element0 from edge_partition\n"); +#endif + vertex_t major = thrust::get<0>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -410,15 +407,35 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { +#if 1 printf("element0 from first_element_to_idx_map******\n"); +#endif + auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_degree0 = static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); - indices0 = first_element_indices.begin() + first_element_offsets[idx]; - + indices0 = first_element_indices.begin() + first_element_offsets[idx]; local_edge_offset0 = first_element_offsets[idx]; } + if constexpr (std::is_same_v) { + if constexpr (!std::is_same_v) { + property0 = edge_partition_e_value_input.value_first() + local_edge_offset0; + + } else { + // nothing + } + + } else { + if constexpr (!std::is_same_v) { + property0 = first_element_properties.begin() + local_edge_offset0; + + } else { + // nothing + } + } + +#if 1 vertex_t element0 = thrust::get<0>(pair); printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", static_cast(element0), @@ -427,31 +444,43 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { for (edge_t k = 0; k < local_degree0; k++) { if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - weight_t ew{}; - ew = edge_partition_e_value_input.get(local_edge_offset0 + k); - printf("(%d %.2f)* ", static_cast(indices0[k]), static_cast(ew)); + if constexpr (!std::is_same_v) { + EdgeProperty_t ep{}; + ep = edge_partition_e_value_input.get(local_edge_offset0 + k); + printf("( %d %d %.2f %.2f)* ", + static_cast(k), + static_cast(indices0[k]), + static_cast(ep), + static_cast(property0[k])); } else { - printf("%d* ", static_cast(indices0[k])); + printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } else { - if constexpr (!std::is_same_v) { - weight_t ew = *(first_element_weights.begin() + local_edge_offset0 + k); - printf("(%d %.2f)* ", static_cast(indices0[k]), static_cast(ew)); + if constexpr (!std::is_same_v) { + EdgeProperty_t ep = *(first_element_properties.begin() + local_edge_offset0 + k); + printf("(%d %d %.2f %.2f)* ", + static_cast(k), + static_cast(indices0[k]), + static_cast(ep), + static_cast(property0[k])); } else { - printf("%d* ", static_cast(indices0[k])); + printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } } - printf("\n"); +#endif vertex_t const* indices1{nullptr}; + EdgeProperty_t const* property1{nullptr}; [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { +#if 1 printf("element1 from edge_partition\n"); +#endif + vertex_t major = thrust::get<1>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -472,7 +501,10 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { +#if 1 printf("element1 from second_element_to_idx_map----\n"); +#endif + auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_degree1 = static_cast(second_element_offsets[idx + 1] - second_element_offsets[idx]); @@ -481,6 +513,24 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { local_edge_offset1 = second_element_offsets[idx]; } + if constexpr (std::is_same_v) { + if constexpr (!std::is_same_v) { + property1 = edge_partition_e_value_input.value_first() + local_edge_offset1; + + } else { + // nothing + } + + } else { + if constexpr (!std::is_same_v) { + property1 = second_element_properties.begin() + local_edge_offset1; + + } else { + // nothing + } + } + +#if 1 vertex_t element1 = thrust::get<1>(pair); printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", static_cast(element1), @@ -489,43 +539,154 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { for (edge_t k = 0; k < local_degree1; k++) { if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - weight_t ew{}; - ew = edge_partition_e_value_input.get(local_edge_offset1 + k); - printf("(%d %.2f)- ", static_cast(indices1[k]), static_cast(ew)); + if constexpr (!std::is_same_v) { + EdgeProperty_t ep{}; + ep = edge_partition_e_value_input.get(local_edge_offset1 + k); + printf("(%d %d %.2f %.2f)- ", + static_cast(k), + static_cast(indices1[k]), + static_cast(ep), + static_cast(property1[k])); } else { - printf("%d- ", static_cast(indices1[k])); + printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } else { - if constexpr (!std::is_same_v) { - weight_t ew = *(second_element_weights.begin() + local_edge_offset1 + k); - printf("(%d %.2f)- ", static_cast(indices1[k]), static_cast(ew)); + if constexpr (!std::is_same_v) { + EdgeProperty_t ep = *(second_element_properties.begin() + local_edge_offset1 + k); + printf("(%d %d %.2f %.2f)- ", + static_cast(k), + static_cast(indices1[k]), + static_cast(ep), + static_cast(property1[k])); } else { - printf("%d- ", static_cast(indices1[k])); + printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } } printf("\n"); +#endif // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - auto it = - thrust::set_intersection(thrust::seq, - indices0, - indices0 + local_degree0, - indices1, - indices1 + local_degree1, - nbr_intersection_indices.begin() + nbr_intersection_offsets[i]); + auto inbr_start = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; + + auto it = thrust::set_intersection(thrust::seq, + indices0, + indices0 + local_degree0, + indices1, + indices1 + local_degree1, + inbr_start); thrust::fill(thrust::seq, it, nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], invalid_id); - return static_cast( - thrust::distance(nbr_intersection_indices.begin() + nbr_intersection_offsets[i], it)); + // + auto insection_size = static_cast(thrust::distance(inbr_start, it)); + + printf( + "rank = %d insection_size=%d\n", static_cast(rank), static_cast(insection_size)); + + printf("\n"); + for (size_t k = 0; k < insection_size; k++) { + printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); + } + printf("\n"); + + if constexpr (!std::is_same_v) { + auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; + +#if 1 + printf("\n"); + for (size_t k = 0; k < local_degree0; k++) { + printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); + } + + printf("\n"); + for (size_t k = 0; k < local_degree1; k++) { + printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); + } + printf("\n"); + +#endif + + // copy edge properties from first vertex to common neighbors + thrust::lower_bound(thrust::seq, + indices0, + indices0 + local_degree0, + inbr_start, + it, + ip0_start, // indices + thrust::less()); + + printf("\n"); + for (size_t k = 0; k < insection_size; k++) { + printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); + } + printf("\n"); + + auto myrank = rank; + thrust::transform(thrust::seq, + ip0_start, + ip0_start + insection_size, + ip0_start, + [property0, myrank] __device__(auto idx) { + printf( + "myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); + return property0[static_cast(idx)]; + }); + + /// + + printf("\n"); + for (size_t k = 0; k < insection_size; k++) { + printf("rank = %d inbrp0 = %d ", static_cast(rank), static_cast(ip0_start[k])); + } + printf("\n"); + + /// + + auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; + + // copy edge properties from second vertex to common neighbors + thrust::lower_bound(thrust::seq, + indices1, + indices1 + local_degree1, + inbr_start, + it, + ip1_start, // indices + thrust::less()); + + printf("\n"); + for (size_t k = 0; k < insection_size; k++) { + printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); + } + printf("\n"); + + thrust::transform(thrust::seq, + ip1_start, + ip1_start + insection_size, + ip1_start, + [property1, myrank] __device__(auto idx) { + printf( + "myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); + return property1[static_cast(idx)]; + }); + + /// + + printf("\n"); + for (size_t k = 0; k < insection_size; k++) { + printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); + } + printf("\n"); + + /// + } + return static_cast(thrust::distance(inbr_start, it)); } }; @@ -545,7 +706,7 @@ struct strided_accumulate_t { } }; -template +template struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -553,9 +714,18 @@ struct gatherv_indices_t { raft::device_span gathered_intersection_offsets{}; raft::device_span gathered_intersection_indices{}; raft::device_span combined_nbr_intersection_offsets{}; - raft::device_span combined_nbr_intersection_indices{}; + std::optional> gathered_nbr_intersection_properties0{ + std::nullopt}; + std::optional> gathered_nbr_intersection_properties1{ + std::nullopt}; + + std::optional> combined_nbr_intersection_properties0{ + std::nullopt}; + std::optional> combined_nbr_intersection_properties1{ + std::nullopt}; + __device__ void operator()(size_t i) const { auto output_offset = combined_nbr_intersection_offsets[i]; @@ -563,15 +733,36 @@ struct gatherv_indices_t { // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree vertices // in a single warp (better optimize if this becomes a performance bottleneck) + auto zipped_gathered_begin = + thrust::make_zip_iterator(thrust::make_tuple(gathered_intersection_indices.begin(), + gathered_nbr_intersection_properties0->begin(), + gathered_nbr_intersection_properties1->begin())); + + auto zipped_combined_begin = + thrust::make_zip_iterator(thrust::make_tuple(combined_nbr_intersection_indices.begin(), + combined_nbr_intersection_properties0->begin(), + combined_nbr_intersection_properties1->begin())); + for (int j = 0; j < minor_comm_size; ++j) { - thrust::copy( - thrust::seq, - gathered_intersection_indices.begin() + gathered_intersection_offsets[output_size * j + i], - gathered_intersection_indices.begin() + - gathered_intersection_offsets[output_size * j + i + 1], - combined_nbr_intersection_indices.begin() + output_offset); - output_offset += gathered_intersection_offsets[output_size * j + i + 1] - - gathered_intersection_offsets[output_size * j + i]; + if constexpr (!std::is_same_v) { + thrust::copy(thrust::seq, + zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], + zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1], + zipped_combined_begin + output_offset); + + output_offset += gathered_intersection_offsets[output_size * j + i + 1] - + gathered_intersection_offsets[output_size * j + i]; + + } else { + thrust::copy(thrust::seq, + gathered_intersection_indices.begin() + + gathered_intersection_offsets[output_size * j + i], + gathered_intersection_indices.begin() + + gathered_intersection_offsets[output_size * j + i + 1], + combined_nbr_intersection_indices.begin() + output_offset); + output_offset += gathered_intersection_offsets[output_size * j + i + 1] - + gathered_intersection_offsets[output_size * j + i]; + } } } }; @@ -663,12 +854,15 @@ size_t count_invalid_vertex_pairs(raft::handle_t const& handle, // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID % // number of groups" is recommended for load-balancing. template -std::tuple, rmm::device_uvector> +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>> nbr_intersection(raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeValueInputWrapper edge_value_input, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, - EdgeValueInputWrapper edge_value_input, std::array intersect_dst_nbr, bool do_expensive_check = false) { @@ -683,7 +877,8 @@ nbr_intersection(raft::handle_t const& handle, typename EdgeValueInputWrapper::value_iterator, typename EdgeValueInputWrapper::value_type>>; - using weight_t = typename edge_partition_e_input_device_view_t::value_type; + // using EdgeProperty_t = typename edge_partition_e_input_device_view_t::value_type; + using EdgeProperty_t = typename EdgeValueInputWrapper::value_type; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -721,7 +916,7 @@ nbr_intersection(raft::handle_t const& handle, std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; - std::optional> major_nbr_weights{std::nullopt}; + std::optional> major_nbr_properties{std::nullopt}; if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { @@ -896,8 +1091,15 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); - rmm::device_uvector local_nbrs_weights_for_rx_majors(size_t{0}, - handle.get_stream()); + + // rmm::device_uvector local_nbrs_properties_for_rx_majors(size_t{0}, + // handle.get_stream()); + std::optional> local_nbrs_properties_for_rx_majors{ + std::nullopt}; + if constexpr (!std::is_same_v) { + local_nbrs_properties_for_rx_majors = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + } std::vector local_nbr_counts{}; { @@ -1018,8 +1220,9 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - local_nbrs_weights_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), - handle.get_stream()); + if (local_nbrs_properties_for_rx_majors) + (*local_nbrs_properties_for_rx_majors) + .resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); for (int k = 0; k < comm_size; k++) { comm.barrier(); @@ -1055,6 +1258,7 @@ nbr_intersection(raft::handle_t const& handle, thrust::make_counting_iterator(reordered_idx_last), update_rx_major_local_nbrs_t{ major_comm_size, @@ -1071,8 +1275,8 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - raft::device_span(local_nbrs_weights_for_rx_majors.data(), - local_nbrs_weights_for_rx_majors.size())}); + raft::device_span((*local_nbrs_properties_for_rx_majors).data(), + (*local_nbrs_properties_for_rx_majors).size())}); } for (int k = 0; k < comm_size; k++) { @@ -1166,10 +1370,10 @@ nbr_intersection(raft::handle_t const& handle, std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - if constexpr (!std::is_same_v) { - std::tie(*major_nbr_weights, std::ignore) = + if constexpr (!std::is_same_v) { + std::tie(*major_nbr_properties, std::ignore) = shuffle_values(major_comm, - local_nbrs_weights_for_rx_majors.begin(), + (*local_nbrs_properties_for_rx_majors).begin(), local_nbr_counts, handle.get_stream()); } @@ -1185,10 +1389,10 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_indices).size(), std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_weights)", - (*major_nbr_weights).data(), - (*major_nbr_weights).size(), + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_properties)", + (*major_nbr_properties).data(), + (*major_nbr_properties).size(), std::cout); } @@ -1225,7 +1429,19 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); - rmm::device_uvector nbr_intersection_weights(size_t{0}, handle.get_stream()); + // rmm::device_uvector nbr_intersection_properties0(size_t{0}, + // handle.get_stream()); rmm::device_uvector + // nbr_intersection_properties1(size_t{0}, handle.get_stream()); + + std::optional> nbr_intersection_properties0{std::nullopt}; + std::optional> nbr_intersection_properties1{std::nullopt}; + + if constexpr (!std::is_same_v) { + nbr_intersection_properties0 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + nbr_intersection_properties1 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + } if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -1269,6 +1485,17 @@ nbr_intersection(raft::handle_t const& handle, std::vector> edge_partition_nbr_intersection_indices{}; edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); + + std::vector> edge_partition_nbr_intersection_property0{}; + std::vector> edge_partition_nbr_intersection_property1{}; + + if constexpr (!std::is_same_v) { + edge_partition_nbr_intersection_property0.reserve( + graph_view.number_of_local_edge_partitions()); + edge_partition_nbr_intersection_property1.reserve( + graph_view.number_of_local_edge_partitions()); + } + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto rx_v_pair_counts = host_scalar_allgather(minor_comm, input_counts[i], handle.get_stream()); @@ -1285,8 +1512,23 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); - rmm::device_uvector rx_v_pair_nbr_intersection_weights(size_t{0}, - handle.get_stream()); + // rmm::device_uvector (*rx_v_pair_nbr_intersection_properties0)( + // size_t{0}, handle.get_stream()); + + // rmm::device_uvector (*rx_v_pair_nbr_intersection_properties1)( + // size_t{0}, handle.get_stream()); + + std::optional> rx_v_pair_nbr_intersection_properties0{ + std::nullopt}; + std::optional> rx_v_pair_nbr_intersection_properties1{ + std::nullopt}; + + if constexpr (!std::is_same_v) { + rx_v_pair_nbr_intersection_properties0 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + rx_v_pair_nbr_intersection_properties1 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + } std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); { @@ -1351,9 +1593,12 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - rx_v_pair_nbr_intersection_weights.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - + if constexpr (!std::is_same_v) { + (*rx_v_pair_nbr_intersection_properties0) + .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties1) + .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); @@ -1370,10 +1615,10 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_indices).size(), std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_weights)", - (*major_nbr_weights).data(), - (*major_nbr_weights).size(), + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_properties)", + (*major_nbr_properties).data(), + (*major_nbr_properties).size(), std::cout); } @@ -1394,42 +1639,145 @@ nbr_intersection(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer)), vertex_t, edge_t, + EdgeProperty_t, edge_partition_e_input_device_view_t, - true>{nullptr, - raft::device_span(), - raft::device_span(), - raft::device_span(), - second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), - (*major_nbr_offsets).size()), - raft::device_span((*major_nbr_indices).data(), - (*major_nbr_indices).size()), - raft::device_span((*major_nbr_weights).data(), - (*major_nbr_weights).size()), - edge_partition, - edge_partition_e_value_input, - get_dataframe_buffer_begin(vertex_pair_buffer), - raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), - rx_v_pair_nbr_intersection_offsets.size()), - raft::device_span(rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size()), - raft::device_span(rx_v_pair_nbr_intersection_weights.data(), - rx_v_pair_nbr_intersection_weights.size()), - invalid_vertex_id::value, - handle.get_comms().get_rank(), - i}); + true>{ + nullptr, + raft::device_span(), + raft::device_span(), + raft::device_span(), + second_element_to_idx_map, + raft::device_span((*major_nbr_offsets).data(), + (*major_nbr_offsets).size()), + raft::device_span((*major_nbr_indices).data(), + (*major_nbr_indices).size()), + raft::device_span((*major_nbr_properties).data(), + (*major_nbr_properties).size()), + edge_partition, + edge_partition_e_value_input, + get_dataframe_buffer_begin(vertex_pair_buffer), + raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), + rx_v_pair_nbr_intersection_offsets.size()), + raft::device_span(rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size()), + raft::device_span((*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size()), + raft::device_span((*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size()), + invalid_vertex_id::value, + handle.get_comms().get_rank(), + i}); + + // { + // auto& comm = handle.get_comms(); + // auto const comm_rank = comm.get_rank(); + // auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_v_pair_nbr_intersection_indices", + rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", + (*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", + (*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + // } + } else { CUGRAPH_FAIL("unimplemented."); } - rx_v_pair_nbr_intersection_indices.resize( - thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), - thrust::remove(handle.get_thrust_policy(), - rx_v_pair_nbr_intersection_indices.begin(), - rx_v_pair_nbr_intersection_indices.end(), - invalid_vertex_id::value)), - handle.get_stream()); - rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + if constexpr (std::is_same_v) { + rx_v_pair_nbr_intersection_indices.resize( + thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), + thrust::remove(handle.get_thrust_policy(), + rx_v_pair_nbr_intersection_indices.begin(), + rx_v_pair_nbr_intersection_indices.end(), + invalid_vertex_id::value)), + handle.get_stream()); + rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + } else { + auto common_nbr_and_properties_begin = thrust::make_zip_iterator( + thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(), + (*rx_v_pair_nbr_intersection_properties0).begin(), + (*rx_v_pair_nbr_intersection_properties1).begin())); + + auto last = thrust::remove_if( + handle.get_thrust_policy(), + common_nbr_and_properties_begin, + common_nbr_and_properties_begin + rx_v_pair_nbr_intersection_indices.size(), + [] __device__(auto nbr_p0_p1) { + return thrust::get<0>(nbr_p0_p1) == invalid_vertex_id::value; + }); + + rx_v_pair_nbr_intersection_indices.resize( + thrust::distance(common_nbr_and_properties_begin, last), handle.get_stream()); + + rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + + (*rx_v_pair_nbr_intersection_properties0) + .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties0).shrink_to_fit(handle.get_stream()); + + (*rx_v_pair_nbr_intersection_properties1) + .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties1).shrink_to_fit(handle.get_stream()); + } + + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_v_pair_nbr_intersection_indices", + rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", + (*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", + (*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } thrust::inclusive_scan(handle.get_thrust_policy(), rx_v_pair_nbr_intersection_sizes.begin(), @@ -1546,6 +1894,24 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); + + // rmm::device_uvector combined_nbr_intersection_properties0( + // size_t{0}, handle.get_stream()); + // rmm::device_uvector combined_nbr_intersection_properties1( + // size_t{0}, handle.get_stream()); + + std::optional> combined_nbr_intersection_properties0{ + std::nullopt}; + std::optional> combined_nbr_intersection_properties1{ + std::nullopt}; + + if constexpr (!std::is_same_v) { + combined_nbr_intersection_properties0 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + combined_nbr_intersection_properties1 = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + } + { std::vector ranks(minor_comm_size); std::iota(ranks.begin(), ranks.end(), int{0}); @@ -1581,26 +1947,192 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ - rx_v_pair_counts[minor_comm_rank], - minor_comm_size, - raft::device_span(gathered_nbr_intersection_offsets.data(), - gathered_nbr_intersection_offsets.size()), - raft::device_span(gathered_nbr_intersection_indices.data(), - gathered_nbr_intersection_indices.size()), - raft::device_span(combined_nbr_intersection_offsets.data(), - combined_nbr_intersection_offsets.size()), - raft::device_span(combined_nbr_intersection_indices.data(), - combined_nbr_intersection_indices.size())}); + // rmm::device_uvector gathered_nbr_intersection_properties0( + // rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + // handle.get_stream()); + + // rmm::device_uvector gathered_nbr_intersection_properties1( + // rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + // handle.get_stream()); + + std::optional> gathered_nbr_intersection_properties0{ + std::nullopt}; + std::optional> gathered_nbr_intersection_properties1{ + std::nullopt}; + + if constexpr (!std::is_same_v) { + gathered_nbr_intersection_properties0 = + std::make_optional(rmm::device_uvector( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream())); + gathered_nbr_intersection_properties1 = + std::make_optional(rmm::device_uvector( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream())); + } + + if constexpr (!std::is_same_v) { + device_multicast_sendrecv(minor_comm, + (*rx_v_pair_nbr_intersection_properties0).begin(), + rx_v_pair_nbr_intersection_index_tx_counts, + tx_displacements, + ranks, + (*gathered_nbr_intersection_properties0).begin(), + gathered_nbr_intersection_index_rx_counts, + rx_displacements, + ranks, + handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties0).resize(size_t{0}, handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties0).shrink_to_fit(handle.get_stream()); + + (*combined_nbr_intersection_properties0) + .resize((*gathered_nbr_intersection_properties0).size(), handle.get_stream()); + + device_multicast_sendrecv(minor_comm, + (*rx_v_pair_nbr_intersection_properties1).begin(), + rx_v_pair_nbr_intersection_index_tx_counts, + tx_displacements, + ranks, + (*gathered_nbr_intersection_properties1).begin(), + gathered_nbr_intersection_index_rx_counts, + rx_displacements, + ranks, + handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties1).resize(size_t{0}, handle.get_stream()); + (*rx_v_pair_nbr_intersection_properties1).shrink_to_fit(handle.get_stream()); + (*combined_nbr_intersection_properties1) + .resize((*gathered_nbr_intersection_properties1).size(), handle.get_stream()); + } + + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-device_multicast_sendrecv) :" << comm_rank + << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("gathered_nbr_intersection_indices", + gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*gathered_nbr_intersection_properties0)", + (*gathered_nbr_intersection_properties0).data(), + (*gathered_nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*gathered_nbr_intersection_properties1)", + (*gathered_nbr_intersection_properties1).data(), + (*gathered_nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + + if constexpr (!std::is_same_v) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), + gatherv_indices_t{ + rx_v_pair_counts[minor_comm_rank], + minor_comm_size, + raft::device_span(gathered_nbr_intersection_offsets.data(), + gathered_nbr_intersection_offsets.size()), + raft::device_span(gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size()), + raft::device_span(combined_nbr_intersection_offsets.data(), + combined_nbr_intersection_offsets.size()), + raft::device_span(combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size()), + + raft::device_span((*gathered_nbr_intersection_properties0).data(), + (*gathered_nbr_intersection_properties0).size()), + raft::device_span((*gathered_nbr_intersection_properties1).data(), + (*gathered_nbr_intersection_properties1).size()), + + raft::device_span((*combined_nbr_intersection_properties0).data(), + (*combined_nbr_intersection_properties0).size()), + raft::device_span((*combined_nbr_intersection_properties1).data(), + (*combined_nbr_intersection_properties1).size()) + + }); + + } else { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), + gatherv_indices_t{ + rx_v_pair_counts[minor_comm_rank], + minor_comm_size, + raft::device_span(gathered_nbr_intersection_offsets.data(), + gathered_nbr_intersection_offsets.size()), + raft::device_span(gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size()), + raft::device_span(combined_nbr_intersection_offsets.data(), + combined_nbr_intersection_offsets.size()), + raft::device_span(combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size()) + + }); + } + + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-gather) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("combined_nbr_intersection_indices", + combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*combined_nbr_intersection_properties0)", + (*combined_nbr_intersection_properties0).data(), + (*combined_nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*combined_nbr_intersection_properties1)", + (*combined_nbr_intersection_properties1).data(), + (*combined_nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } } edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); + if constexpr (!std::is_same_v) { + edge_partition_nbr_intersection_property0.push_back( + std::move((*combined_nbr_intersection_properties0))); + edge_partition_nbr_intersection_property1.push_back( + std::move((*combined_nbr_intersection_properties1))); + } } rmm::device_uvector nbr_intersection_sizes(input_size, handle.get_stream()); @@ -1609,7 +2141,10 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); - nbr_intersection_weights.resize(nbr_intersection_indices.size(), handle.get_stream()); + if constexpr (!std::is_same_v) { + (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); + (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + } size_t size_offset{0}; size_t index_offset{0}; for (size_t i = 0; i < edge_partition_nbr_intersection_sizes.size(); ++i) { @@ -1622,6 +2157,20 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices[i].begin(), edge_partition_nbr_intersection_indices[i].end(), nbr_intersection_indices.begin() + index_offset); + + if constexpr (!std::is_same_v) { + thrust::copy(handle.get_thrust_policy(), + edge_partition_nbr_intersection_property0[i].begin(), + edge_partition_nbr_intersection_property0[i].end(), + (*nbr_intersection_properties0).begin() + index_offset); + + thrust::copy(handle.get_thrust_policy(), + edge_partition_nbr_intersection_property1[i].begin(), + edge_partition_nbr_intersection_property1[i].end(), + (*nbr_intersection_properties1).begin() + index_offset); + } + + // Need to copy to (*nbr_intersection_properties0) and (*nbr_intersection_properties1) index_offset += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_offsets.resize(nbr_intersection_sizes.size() + size_t{1}, handle.get_stream()); @@ -1632,6 +2181,7 @@ nbr_intersection(raft::handle_t const& handle, size_first, size_first + nbr_intersection_sizes.size(), nbr_intersection_offsets.begin() + 1); + ///<=========== to here } else { auto edge_partition = edge_partition_device_view_t( @@ -1667,7 +2217,12 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - nbr_intersection_weights.resize(nbr_intersection_indices.size(), handle.get_stream()); + + if constexpr (!std::is_same_v) { + (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); + (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + } + if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { thrust::tabulate( handle.get_thrust_policy(), @@ -1678,16 +2233,17 @@ nbr_intersection(raft::handle_t const& handle, decltype(vertex_pair_first), vertex_t, edge_t, + EdgeProperty_t, edge_partition_e_input_device_view_t, false>{ nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + raft::device_span(), nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + raft::device_span(), edge_partition, edge_partition_e_value_input, vertex_pair_first, @@ -1695,8 +2251,10 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), - raft::device_span(nbr_intersection_weights.data(), - nbr_intersection_weights.size()), + raft::device_span((*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size()), + raft::device_span((*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size()), invalid_vertex_id::value, 0, 0}); @@ -1712,31 +2270,168 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.end(), detail::not_equal_t{invalid_vertex_id::value}), handle.get_stream()); + + // rmm::device_uvector tmp_properties0(size_t{0}, handle.get_stream()); + // rmm::device_uvector tmp_properties1(size_t{0}, handle.get_stream()); + // if constexpr (!std::is_same_v) { + // tmp_properties0.resize(tmp_indices.size(), handle.get_stream()); + // tmp_properties1.resize(tmp_indices.size(), handle.get_stream()); + // } + + std::optional> tmp_properties0{std::nullopt}; + std::optional> tmp_properties1{std::nullopt}; + + if constexpr (!std::is_same_v) { + tmp_properties0 = std::make_optional( + rmm::device_uvector(tmp_indices.size(), handle.get_stream())); + tmp_properties1 = std::make_optional( + rmm::device_uvector(tmp_indices.size(), handle.get_stream())); + } + + auto zipped_itr_to_indices_and_properties_begin = + thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), + (*nbr_intersection_properties0).begin(), + (*nbr_intersection_properties1).begin())); + + auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( + tmp_indices.begin(), (*tmp_properties0).begin(), (*tmp_properties1).begin())); + size_t num_copied{0}; size_t num_scanned{0}; + + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (before while loop) :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + while (num_scanned < nbr_intersection_indices.size()) { size_t this_scan_size = std::min( size_t{1} << 30, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); - num_copied += static_cast(thrust::distance( - tmp_indices.begin() + num_copied, - thrust::copy_if(handle.get_thrust_policy(), - nbr_intersection_indices.begin() + num_scanned, - nbr_intersection_indices.begin() + num_scanned + this_scan_size, - tmp_indices.begin() + num_copied, - detail::not_equal_t{invalid_vertex_id::value}))); + if constexpr (std::is_same_v) { + num_copied += static_cast(thrust::distance( + tmp_indices.begin() + num_copied, + thrust::copy_if(handle.get_thrust_policy(), + nbr_intersection_indices.begin() + num_scanned, + nbr_intersection_indices.begin() + num_scanned + this_scan_size, + tmp_indices.begin() + num_copied, + detail::not_equal_t{invalid_vertex_id::value}))); + } else { + num_copied += static_cast(thrust::distance( + zipped_itr_to_tmps_begin + num_copied, + thrust::copy_if(handle.get_thrust_policy(), + zipped_itr_to_indices_and_properties_begin + num_scanned, + zipped_itr_to_indices_and_properties_begin + num_scanned + this_scan_size, + zipped_itr_to_tmps_begin + num_copied, + [] __device__(auto nbr_p0_p1) { + auto nbr = thrust::get<0>(nbr_p0_p1); + auto p0 = thrust::get<1>(nbr_p0_p1); + auto p1 = thrust::get<2>(nbr_p0_p1); + printf("%d %d %d\n", + static_cast(nbr), + static_cast(p0), + static_cast(p1)); + return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; + }))); + } num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); + if constexpr (!std::is_same_v) { + nbr_intersection_properties0 = std::move(tmp_properties0); + nbr_intersection_properties1 = std::move(tmp_properties1); + } + + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after while loop) :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + #else - nbr_intersection_indices.resize( - thrust::distance(nbr_intersection_indices.begin(), - thrust::remove(handle.get_thrust_policy(), - nbr_intersection_indices.begin(), - nbr_intersection_indices.end(), - invalid_vertex_id::value)), - handle.get_stream()); + + if constexpr (std::is_same_v) { + nbr_intersection_indices.resize( + thrust::distance(nbr_intersection_indices.begin(), + thrust::remove(handle.get_thrust_policy(), + nbr_intersection_indices.begin(), + nbr_intersection_indices.end(), + invalid_vertex_id::value)), + handle.get_stream()); + } else { + nbr_intersection_indices.resize( + thrust::distance(zipped_itr_to_indices_and_properties_begin, + thrust::remove_if(handle.get_thrust_policy(), + zipped_itr_to_indices_and_properties_begin, + zipped_itr_to_indices_and_properties_begin + + nbr_intersection_indices.size(), + [] __device__(auto nbr_p0_p1) { + return thrust::get<0>(nbr_p0_p1) == + invalid_vertex_id::value; + })), + handle.get_stream()); + + (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); + (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + } #endif thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1745,9 +2440,45 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.begin() + 1); } + { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), + std::cout); + + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); + + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + // 5. Return - return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices)); + return std::make_tuple(std::move(nbr_intersection_offsets), + std::move(nbr_intersection_indices), + std::move(nbr_intersection_properties0), + std::move(nbr_intersection_properties1)); } } // namespace detail diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index b1d0f971630..3d183cf45f1 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -97,6 +97,7 @@ struct indirection_compare_less_t { template ::value_type; + using edge_property_value_t = + typename thrust::iterator_traits::value_type; auto index = *(major_minor_pair_index_first + i); auto pair = *(major_minor_pair_first + index); @@ -128,6 +133,17 @@ struct call_intersection_op_t { auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); + auto properties0 = raft::device_span(); + auto properties1 = raft::device_span(); + + if constexpr (!std::is_same_v) { + properties0 = raft::device_span( + nbr_intersection_properties0 + nbr_offsets[i], + nbr_intersection_properties0 + +nbr_offsets[i + 1]); + properties1 = raft::device_span( + nbr_intersection_properties1 + nbr_offsets[i], + nbr_intersection_properties1 + +nbr_offsets[i + 1]); + } property_t src_prop{}; property_t dst_prop{}; if (unique_vertices) { @@ -162,8 +178,14 @@ struct call_intersection_op_t { } printf("\n"); + // if constexpr (std::is_same_v) { *(major_minor_pair_value_output_first + index) = - intersection_op(src, dst, src_prop, dst_prop, intersection); + intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); + + // } else { + // *(major_minor_pair_value_output_first + index) = + // intersection_op(src, dst, src_prop, dst_prop, intersection.size()); + // } } }; @@ -212,10 +234,10 @@ template ::value_type; - using result_t = typename thrust::iterator_traits::value_type; + using edge_property_value_t = typename EdgeValueInputWrapper::value_type; + using result_t = typename thrust::iterator_traits::value_type; CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -384,15 +407,59 @@ void per_v_pair_transform_dst_nbr_intersection( auto chunk_vertex_pair_first = thrust::make_transform_iterator( chunk_vertex_pair_index_first, detail::indirection_t{vertex_pair_first}); - auto [intersection_offsets, intersection_indices] = + auto [intersection_offsets, + intersection_indices, + r_nbr_intersection_properties0, + r_nbr_intersection_properties1] = detail::nbr_intersection(handle, graph_view, + edge_value_input, chunk_vertex_pair_first, chunk_vertex_pair_first + this_chunk_size, - edge_value_input, std::array{true, true}, do_expensive_check); + if constexpr (!std::is_same_v) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("intersection_offsets", + intersection_offsets.data(), + intersection_offsets.size(), + std::cout); + + raft::print_device_vector("intersection_indices", + intersection_indices.data(), + intersection_indices.size(), + std::cout); + + // if constexpr (!std::is_same_v) { + if (r_nbr_intersection_properties0) { + raft::print_device_vector("r_nbr_intersection_properties0", + r_nbr_intersection_properties0->data(), + r_nbr_intersection_properties0->size(), + std::cout); + } + if (r_nbr_intersection_properties1) { + raft::print_device_vector("r_nbr_intersection_properties1", + r_nbr_intersection_properties1->data(), + r_nbr_intersection_properties1->size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + } + if (unique_vertices) { auto vertex_value_input_for_unique_vertices_first = get_dataframe_buffer_begin(*property_buffer_for_unique_vertices); @@ -400,12 +467,14 @@ void per_v_pair_transform_dst_nbr_intersection( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t{ + detail::call_intersection_op_t< + GraphViewType, + decltype(vertex_value_input_for_unique_vertices_first), + typename decltype(r_nbr_intersection_properties0)::value_type::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ edge_partition, thrust::make_optional>((*unique_vertices).data(), (*unique_vertices).size()), @@ -413,28 +482,35 @@ void per_v_pair_transform_dst_nbr_intersection( intersection_op, intersection_offsets.data(), intersection_indices.data(), + r_nbr_intersection_properties0 ? r_nbr_intersection_properties0->data() : nullptr, + r_nbr_intersection_properties1 ? r_nbr_intersection_properties1->data() : nullptr, chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); } else { - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t{ - edge_partition, - thrust::optional>{thrust::nullopt}, - vertex_value_input_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t< + GraphViewType, + VertexValueInputIterator, + typename decltype(r_nbr_intersection_properties0)::value_type::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ + edge_partition, + thrust::optional>{thrust::nullopt}, + vertex_value_input_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_properties0 ? r_nbr_intersection_properties0->data() : nullptr, + r_nbr_intersection_properties1 ? r_nbr_intersection_properties1->data() : nullptr, + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } chunk_vertex_pair_index_first += this_chunk_size; diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index 0913c9eb28f..376b12ca1b2 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -65,6 +65,7 @@ struct compute_chunk_id_t { template struct call_intersection_op_t { @@ -77,6 +78,8 @@ struct call_intersection_op_t { IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; + EdgeValueInputWrapper nbr_intersection_properties0{nullptr}; + EdgeValueInputWrapper nbr_intersection_properties1{nullptr}; VertexPairIterator major_minor_pair_first{}; __device__ auto operator()(size_t i) const @@ -337,12 +340,15 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( chunk_vertex_pair_first + this_chunk_size); // detail::nbr_intersection() requires the // input vertex pairs to be sorted. - auto [intersection_offsets, intersection_indices] = + auto [intersection_offsets, + intersection_indices, + nbr_intersection_properties0, + nbr_intersection_properties1] = detail::nbr_intersection(handle, graph_view, + cugraph::edge_dummy_property_t{}.view(), chunk_vertex_pair_first, chunk_vertex_pair_first + this_chunk_size, - cugraph::edge_dummy_property_t{}.view(), std::array{true, true}, do_expensive_check); @@ -355,21 +361,29 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( thrust::make_tuple(get_dataframe_buffer_begin(src_value_buffer), get_dataframe_buffer_begin(dst_value_buffer), get_dataframe_buffer_begin(intersection_value_buffer))); - thrust::tabulate(handle.get_thrust_policy(), - triplet_first, - triplet_first + this_chunk_size, - detail::call_intersection_op_t{ - edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - chunk_vertex_pair_first}); + thrust::tabulate( + handle.get_thrust_policy(), + triplet_first, + triplet_first + this_chunk_size, + detail::call_intersection_op_t< + GraphViewType, + edge_partition_src_input_device_view_t, + edge_partition_dst_input_device_view_t, + // typename decltype(nbr_intersection_properties0)::value_type::const_pointer, + std::nullptr_t, + IntersectionOp, + decltype(chunk_vertex_pair_first)>{ + edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + // nbr_intersection_properties0? nbr_intersection_properties0->data(): nullptr, + // nbr_intersection_properties1? nbr_intersection_properties1->data(): nullptr, + nullptr, + nullptr, + chunk_vertex_pair_first}); rmm::device_uvector endpoint_vertices(size_t{0}, handle.get_stream()); auto endpoint_value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp index c2a0b23c6d7..73b5a08827d 100644 --- a/cpp/tests/link_prediction/mg_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_similarity_test.cpp @@ -258,9 +258,10 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}), ::testing::Values(Similarity_Usecase{false, true, 20}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); - + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx") + // , cugraph::test::File_Usecase("test/datasets/netscience.mtx") + ))); +#if 0 INSTANTIATE_TEST_SUITE_P( rmat_small_test, Tests_MGSimilarity_Rmat, @@ -283,5 +284,5 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs ::testing::Values(Similarity_Usecase{false, false, 20}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); - +#endif CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index b9a95fb20be..aaf64000261 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -43,14 +43,16 @@ #include -template +template struct intersection_op_t { __device__ thrust::tuple operator()( vertex_t v0, vertex_t v1, edge_t v0_prop /* out degree */, edge_t v1_prop /* out degree */, - raft::device_span intersection) const + raft::device_span intersection, + raft::device_span intersection_p0, + raft::device_span intersection_p1) const { return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); } @@ -160,11 +162,11 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, + cugraph::edge_dummy_property_t{}.view(), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), - cugraph::edge_dummy_property_t{}.view(), - intersection_op_t{}, + intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); if (cugraph::test::g_perf) { @@ -228,12 +230,14 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, sg_graph_view, + cugraph::edge_dummy_property_t{}.view(), cugraph::get_dataframe_buffer_begin( mg_aggregate_vertex_pair_buffer /* now unrenumbered */), cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered */), sg_out_degrees.begin(), - cugraph::edge_dummy_property_t{}.view(), - intersection_op_t{}, + intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); bool valid = thrust::equal(handle_->get_thrust_policy(), diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index ed0d4a077f2..4bb94aaadde 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -48,14 +48,16 @@ #include -template +template struct intersection_op_t { __device__ thrust::tuple operator()( vertex_t v0, vertex_t v1, edge_t v0_prop /* out degree */, edge_t v1_prop /* out degree */, - raft::device_span intersection) const + raft::device_span intersection, + raft::device_span intersection_p0, + raft::device_span intersection_p1) const { // printf("\n%d %d %d %d %d\n", // static_cast(v0), @@ -326,7 +328,8 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::cout << "(" << major_comm_size << minor_comm_size << ")" << std::endl; - std::cout << major_comm_rank << minor_comm.get_rank() << std::endl; + std::cout << "(major_rank, minor_rank): " << major_comm_rank << minor_comm.get_rank() + << std::endl; raft::print_device_vector("vertex_partitions_range_lasts:", vertex_partitions_range_lasts.data(), @@ -478,22 +481,35 @@ class Tests_MGPerVPairTransformDstNbrIntersection thrust::make_counting_iterator(edge_partition.major_range_first()), thrust::make_counting_iterator(edge_partition.major_range_last()), [edge_partition, edge_partition_weight_value_ptr] __device__(vertex_t major) { + printf("major -> %d\n", major); + vertex_t major_idx{}; auto major_hypersparse_first = edge_partition.major_hypersparse_first(); if (major_hypersparse_first) { + printf("*major_hypersparse_first = %d\n", + static_cast(*major_hypersparse_first)); + if (major < *major_hypersparse_first) { major_idx = edge_partition.major_offset_from_major_nocheck(major); } else { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); - if (!major_hypersparse_idx) { return true; } + if (!major_hypersparse_idx) { + printf("No major_hypersparse_idx\n"); + return true; + } major_idx = edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) + *major_hypersparse_idx; } } else { + printf("No major_hypersparse_first\n"); + major_idx = edge_partition.major_offset_from_major_nocheck(major); } + + printf("==> major_idx = %d\n", major_idx); + vertex_t const* indices{nullptr}; edge_t edge_offset{}; edge_t local_degree{}; @@ -504,11 +520,12 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto number_of_edges = edge_partition.number_of_edges(); - printf("vertex = %d offset_idx = %d deg= %d number_of_edges=%d\n", - major, - edge_offset, - local_degree, - number_of_edges); + printf( + "major = %d edge_offset = %d local_degree= %d nr_edges_of_edge_partition=%d\n", + major, + edge_offset, + local_degree, + number_of_edges); for (edge_t nbr_idx = 0; nbr_idx < local_degree; nbr_idx++) { // printf("%d ", indices[nbr_idx]); printf("%d %d %.2f \n", @@ -561,6 +578,13 @@ class Tests_MGPerVPairTransformDstNbrIntersection prims_usecase.num_vertex_pairs / comm_size + (static_cast(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0), handle_->get_stream()); + + std::cout << "Rank: " << comm_rank + << " prims_usecase.num_vertex_pairs:" << prims_usecase.num_vertex_pairs << std::endl; + + std::cout << "Rank: " << comm_rank << " cugraph::size_dataframe_buffer(mg_vertex_pair_buffer): " + << cugraph::size_dataframe_buffer(mg_vertex_pair_buffer) << std::endl; + thrust::tabulate( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), @@ -568,9 +592,12 @@ class Tests_MGPerVPairTransformDstNbrIntersection [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { cuco::detail::MurmurHash3_32 hash_func{}; // use hash_func to generate arbitrary vertex pairs - auto v0 = 2; // static_cast(hash_func(i + comm_rank) % num_vertices); - auto v1 = - 3; // static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + auto v0 = static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + printf("comm_rank=%d v0= %d, v1=%d\n", + static_cast(comm_rank), + static_cast(v0), + static_cast(v1)); return thrust::make_tuple(v0, v1); }); @@ -628,11 +655,11 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, + edge_weight_view, cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), - edge_weight_view, - intersection_op_t{}, + intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); if (cugraph::test::g_perf) { @@ -719,10 +746,11 @@ class Tests_MGPerVPairTransformDstNbrIntersection // cugraph::per_v_pair_transform_dst_nbr_intersection( // *handle_, // sg_graph_view, + // edge_weight_view, // cugraph::get_dataframe_buffer_begin( // mg_aggregate_vertex_pair_buffer /* now unrenumbered */), // cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered - // */), sg_out_degrees.begin(), edge_weight_view, intersection_op_t{}, + // */), sg_out_degrees.begin(), intersection_op_t{}, // cugraph::get_dataframe_buffer_begin(sg_result_buffer)); // bool valid = thrust::equal(handle_->get_thrust_policy(), @@ -814,7 +842,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1}, true}), + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{5}, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); // INSTANTIATE_TEST_SUITE_P(rmat_small_test, From f4dd84911f48003373d482809afe1cf6cc9a35c5 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Wed, 19 Jul 2023 07:28:20 -0700 Subject: [PATCH 03/22] weighted jaccard, sorsen and overlap tests, with debugging statements --- .../all_cuda-118_arch-x86_64.yaml | 90 +- cpp/src/link_prediction/jaccard_impl.cuh | 35 +- cpp/src/link_prediction/overlap_impl.cuh | 33 +- cpp/src/link_prediction/similarity_impl.cuh | 63 +- cpp/src/link_prediction/sorensen_impl.cuh | 33 +- cpp/src/prims/detail/nbr_intersection.cuh | 841 +++++++++--------- ..._v_pair_transform_dst_nbr_intersection.cuh | 79 +- cpp/tests/CMakeLists.txt | 8 + .../mg_weighted_similarity_test.cpp | 400 +++++++++ .../link_prediction/similarity_compare.cpp | 232 ++++- .../link_prediction/similarity_compare.hpp | 31 +- .../weighted_similarity_test.cpp | 333 +++++++ ...transform_dst_nbr_weighted_intersection.cu | 9 +- cpp/tests/utilities/test_graphs.hpp | 1 + 14 files changed, 1574 insertions(+), 614 deletions(-) create mode 100644 cpp/tests/link_prediction/mg_weighted_similarity_test.cpp create mode 100644 cpp/tests/link_prediction/weighted_similarity_test.cpp diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0487fb53c07..a0fb4379291 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -1,68 +1,22 @@ -# This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -channels: -- rapidsai -- rapidsai-nightly -- dask/label/dev -- conda-forge -- nvidia -dependencies: -- aiohttp -- c-compiler -- cmake>=3.26.4 -- cudatoolkit=11.8 -- cudf==23.8.* -- cupy>=12.0.0 -- cxx-compiler -- cython>=0.29,<0.30 -- dask-core>=2023.5.1 -- dask-cuda==23.8.* -- dask-cudf==23.8.* -- dask>=2023.5.1 -- distributed>=2023.5.1 -- doxygen -- fsspec[http]>=0.6.0 -- gcc_linux-64=11.* -- gmock>=1.13.0 -- graphviz -- gtest>=1.13.0 -- ipython -- libcudf==23.8.* -- libcugraphops==23.8.* -- libraft-headers==23.8.* -- libraft==23.8.* -- librmm==23.8.* -- nbsphinx -- nccl>=2.9.9 -- networkx>=2.5.1 -- ninja -- notebook>=0.5.0 -- numba>=0.57 -- numpy>=1.21 -- numpydoc -- nvcc_linux-64=11.8 -- openmpi -- pandas -- pre-commit -- pydata-sphinx-theme -- pylibcugraphops==23.8.* -- pylibraft==23.8.* -- pytest -- pytest-benchmark -- pytest-cov -- pytest-xdist -- python-louvain -- raft-dask==23.8.* -- recommonmark -- requests -- rmm==23.8.* -- scikit-build>=0.13.1 -- scikit-learn>=0.23.1 -- scipy -- sphinx-copybutton -- sphinx-markdown-tables -- sphinx<6 -- sphinxcontrib-websupport -- ucx-proc=*=gpu -- ucx-py==0.33.* -name: all_cuda-118_arch-x86_64 +#This file is generated by `rapids - dependency - file - generator`. +#To make changes, edit../../ dependencies.yaml and run `rapids - dependency - file - generator`. +channels : -rapidsai - rapidsai - nightly - dask / label / dev - conda - forge - + nvidia dependencies : -aiohttp - c - compiler - cmake >= + 3.26.4 - cudatoolkit = 11.8 - cudf == 23.8. * -cupy >= 12.0.0 - cxx - compiler - cython >= 0.29, + < 0.30 - dask - core >= 2023.5.1 - dask - cuda == 23.8. * -dask - cudf == + 23.8. * -dask >= 2023.5.1 - distributed >= 2023.5.1 - doxygen - fsspec[http] >= + 0.6.0 - gcc_linux - 64 = + 11. * -gmock >= 1.13.0 - graphviz - gtest >= 1.13.0 - ipython - libcudf == + 23.8. * -libcugraphops == 23.8. * -libraft - headers == 23.8. * -libraft == + 23.8. * -librmm == + 23.8. * -nbsphinx - nccl >= 2.9.9 - networkx >= 2.5.1 - ninja - notebook >= + 0.5.0 - numba >= 0.57 - numpy >= 1.21 - numpydoc - nvcc_linux - 64 = + 11.8 - openmpi - pandas - pre - commit - pydata - sphinx - theme - pylibcugraphops == + 23.8. * -pylibraft == + 23.8. * -pytest - pytest - benchmark - pytest - cov - pytest - xdist - python - + louvain - raft - dask == + 23.8. * -recommonmark - requests - rmm == + 23.8. * -scikit - build >= 0.13.1 - scikit - learn >= + 0.23.1 - scipy - sphinx - copybutton - sphinx - markdown - tables - sphinx < + 6 - sphinxcontrib - websupport - ucx - proc = *= + gpu - ucx - py == 0.33. * name : all_cuda - 118_arch - x86_64 diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index b9675e3a578..b67911afbb7 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -27,19 +27,10 @@ struct jaccard_functor_t { template weight_t __device__ compute_score(weight_t cardinality_a, weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const + weight_t cardinality_a_intersect_b, + weight_t cardinality_a_union_b) const { - return cardinality_a_intersect_b / (cardinality_a + cardinality_b - cardinality_a_intersect_b); - } -}; - -struct weighted_jaccard_functor_t { - template - weight_t __device__ compute_score(weight_t weight_a, - weight_t weight_b, - weight_t min_weight_a_intersect_b) const - { - return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b); + return cardinality_a_intersect_b / cardinality_a_union_b; } }; @@ -55,20 +46,12 @@ rmm::device_uvector jaccard_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::jaccard_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_jaccard_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::jaccard_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index 4c001a8f243..b1f3d87b5e4 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -27,22 +27,13 @@ struct overlap_functor_t { template weight_t __device__ compute_score(weight_t cardinality_a, weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const + weight_t cardinality_a_intersect_b, + weight_t cardinality_a_union_b) const { return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b); } }; -struct weighted_overlap_functor_t { - template - weight_t __device__ compute_score(weight_t weight_a, - weight_t weight_b, - weight_t min_weight_a_intersect_b) const - { - return min_weight_a_intersect_b / std::min(weight_a, weight_b); - } -}; - } // namespace detail template @@ -55,20 +46,12 @@ rmm::device_uvector overlap_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::overlap_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_overlap_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::overlap_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index b48f685c064..ba70f83252e 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -65,35 +66,64 @@ rmm::device_uvector similarity( // Compute vertex_degree for all vertices, then distribute to each GPU. // Need to use this instead of the dummy properties below // - auto out_degrees = graph_view.compute_out_degrees(handle); + rmm::device_uvector weighted_out_degrees = + compute_out_weight_sums(handle, graph_view, *edge_weight_view); + + rmm::device_uvector vertex_weights = + compute_out_weight_sums(handle, graph_view, *edge_weight_view); + + std::cout << ">>>>>>> WITH WEIGHT .........." << std::endl; per_v_pair_transform_dst_nbr_intersection( handle, graph_view, *edge_weight_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs, - out_degrees.begin(), + weighted_out_degrees.begin(), [functor] __device__(auto v1, auto v2, - auto v1_degree, - auto v2_degree, + auto weight_a, + auto weight_b, auto intersection, - auto properties0, - auto properties1) { + auto intersected_properties_a, + auto intersected_properties_b) { for (size_t k = 0; k < intersection.size(); k++) { printf("=> %d %f %f\n", static_cast(intersection[k]), - static_cast(properties0[k]), - static_cast(properties1[k])); + static_cast(intersected_properties_a[k]), + static_cast(intersected_properties_b[k])); } - weight_t weight_a = 1; - weight_t weight_b = 1; - weight_t min_weight_a_intersect_b = 1; + weight_t min_weight_a_intersect_b = weight_t{0}; + weight_t max_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_intersected_a = weight_t{0}; + weight_t sum_of_intersected_b = weight_t{0}; + + for (size_t k = 0; k < intersection.size(); k++) { + min_weight_a_intersect_b += + std::min(intersected_properties_a[k], intersected_properties_b[k]); + max_weight_a_intersect_b += + std::max(intersected_properties_a[k], intersected_properties_b[k]); + sum_of_intersected_a += intersected_properties_a[k]; + sum_of_intersected_b += intersected_properties_b[k]; + } + + weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; + weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; + + max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; + + printf("=> v1= %d v2 = %d\n", static_cast(v1), static_cast(v2)); + printf("=>weight_a = %f\n", static_cast(weight_a)); + printf("=>weight_b = %f\n", static_cast(weight_b)); + printf("=>min_weight_a_intersect_b = %f\n", static_cast(min_weight_a_intersect_b)); + printf("=>max_weight_a_intersect_b = %f\n", static_cast(max_weight_a_intersect_b)); + return functor.compute_score(static_cast(weight_a), static_cast(weight_b), - static_cast(min_weight_a_intersect_b)); + static_cast(min_weight_a_intersect_b), + static_cast(max_weight_a_intersect_b)); }, similarity_score.begin(), do_expensive_check); @@ -102,6 +132,7 @@ rmm::device_uvector similarity( // CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); } else { + std::cout << ">>>>>>> WITHOUT WEIGHT .........." << std::endl; rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); // @@ -119,9 +150,11 @@ rmm::device_uvector similarity( out_degrees.begin(), [functor] __device__( auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) { - return functor.compute_score(static_cast(v1_degree), - static_cast(v2_degree), - static_cast(intersection.size())); + return functor.compute_score( + static_cast(v1_degree), + static_cast(v2_degree), + static_cast(intersection.size()), + static_cast(v1_degree + v2_degree - intersection.size())); }, similarity_score.begin(), do_expensive_check); diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index ac84358049a..0972b206c2a 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -27,22 +27,13 @@ struct sorensen_functor_t { template weight_t __device__ compute_score(weight_t cardinality_a, weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const + weight_t cardinality_a_intersect_b, + weight_t cardinality_a_union_b) const { return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b); } }; -struct weighted_sorensen_functor_t { - template - weight_t __device__ compute_score(weight_t weight_a, - weight_t weight_b, - weight_t min_weight_a_intersect_b) const - { - return (2 * min_weight_a_intersect_b) / (weight_a + weight_b); - } -}; - } // namespace detail template @@ -55,20 +46,12 @@ rmm::device_uvector sorensen_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::sorensen_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_sorensen_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::sorensen_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 32eb09d97c0..900ba5190e5 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -366,14 +366,15 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto pair = *(vertex_pair_first + i); #if 1 - printf( - "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); + if (false) + printf( + "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); if constexpr (!std::is_same_v) { - printf("called with FirstElementToIdxMap********\n"); + if (false) printf("called with FirstElementToIdxMap********\n"); } if constexpr (!std::is_same_v) { - printf("called with SecondElementToIdxMap---------\n"); + if (false) printf("called with SecondElementToIdxMap---------\n"); } #endif @@ -384,7 +385,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree0{0}; if constexpr (std::is_same_v) { #if 1 - printf("element0 from edge_partition\n"); + if (false) printf("element0 from edge_partition\n"); #endif vertex_t major = thrust::get<0>(pair); @@ -408,7 +409,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { #if 1 - printf("element0 from first_element_to_idx_map******\n"); + if (false) printf("element0 from first_element_to_idx_map******\n"); #endif auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); @@ -437,39 +438,42 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { #if 1 vertex_t element0 = thrust::get<0>(pair); - printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", - static_cast(element0), - static_cast(local_degree0), - static_cast(local_edge_offset0)); + if (false) + printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", + static_cast(element0), + static_cast(local_degree0), + static_cast(local_edge_offset0)); for (edge_t k = 0; k < local_degree0; k++) { if constexpr (std::is_same_v) { if constexpr (!std::is_same_v) { EdgeProperty_t ep{}; ep = edge_partition_e_value_input.get(local_edge_offset0 + k); - printf("( %d %d %.2f %.2f)* ", - static_cast(k), - static_cast(indices0[k]), - static_cast(ep), - static_cast(property0[k])); + if (false) + printf("( %d %d %.2f %.2f)* ", + static_cast(k), + static_cast(indices0[k]), + static_cast(ep), + static_cast(property0[k])); } else { - printf("%d %d* ", static_cast(k), static_cast(indices0[k])); + if (false) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } else { if constexpr (!std::is_same_v) { EdgeProperty_t ep = *(first_element_properties.begin() + local_edge_offset0 + k); - printf("(%d %d %.2f %.2f)* ", - static_cast(k), - static_cast(indices0[k]), - static_cast(ep), - static_cast(property0[k])); + if (false) + printf("(%d %d %.2f %.2f)* ", + static_cast(k), + static_cast(indices0[k]), + static_cast(ep), + static_cast(property0[k])); } else { - printf("%d %d* ", static_cast(k), static_cast(indices0[k])); + if (false) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } } - printf("\n"); + if (false) printf("\n"); #endif vertex_t const* indices1{nullptr}; @@ -478,7 +482,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree1{0}; if constexpr (std::is_same_v) { #if 1 - printf("element1 from edge_partition\n"); + if (false) printf("element1 from edge_partition\n"); #endif vertex_t major = thrust::get<1>(pair); @@ -502,7 +506,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { #if 1 - printf("element1 from second_element_to_idx_map----\n"); + if (false) printf("element1 from second_element_to_idx_map----\n"); #endif auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); @@ -532,39 +536,42 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { #if 1 vertex_t element1 = thrust::get<1>(pair); - printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", - static_cast(element1), - static_cast(local_degree1), - static_cast(local_edge_offset1)); + if (false) + printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", + static_cast(element1), + static_cast(local_degree1), + static_cast(local_edge_offset1)); for (edge_t k = 0; k < local_degree1; k++) { if constexpr (std::is_same_v) { if constexpr (!std::is_same_v) { EdgeProperty_t ep{}; ep = edge_partition_e_value_input.get(local_edge_offset1 + k); - printf("(%d %d %.2f %.2f)- ", - static_cast(k), - static_cast(indices1[k]), - static_cast(ep), - static_cast(property1[k])); + if (false) + printf("(%d %d %.2f %.2f)- ", + static_cast(k), + static_cast(indices1[k]), + static_cast(ep), + static_cast(property1[k])); } else { - printf("%d %d- ", static_cast(k), static_cast(indices1[k])); + if (false) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } else { if constexpr (!std::is_same_v) { EdgeProperty_t ep = *(second_element_properties.begin() + local_edge_offset1 + k); - printf("(%d %d %.2f %.2f)- ", - static_cast(k), - static_cast(indices1[k]), - static_cast(ep), - static_cast(property1[k])); + if (false) + printf("(%d %d %.2f %.2f)- ", + static_cast(k), + static_cast(indices1[k]), + static_cast(ep), + static_cast(property1[k])); } else { - printf("%d %d- ", static_cast(k), static_cast(indices1[k])); + if (false) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } } - printf("\n"); + if (false) printf("\n"); #endif // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree @@ -587,29 +594,30 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { // auto insection_size = static_cast(thrust::distance(inbr_start, it)); - printf( - "rank = %d insection_size=%d\n", static_cast(rank), static_cast(insection_size)); + if (false) + printf( + "rank = %d insection_size=%d\n", static_cast(rank), static_cast(insection_size)); - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); + if (false) printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); } - printf("\n"); + if (false) printf("\n"); if constexpr (!std::is_same_v) { auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; #if 1 - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < local_degree0; k++) { - printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); + if (false) printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); } - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < local_degree1; k++) { - printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); + if (false) printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); } - printf("\n"); + if (false) printf("\n"); #endif @@ -622,30 +630,31 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip0_start, // indices thrust::less()); - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); + if (false) printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); } - printf("\n"); + if (false) printf("\n"); auto myrank = rank; - thrust::transform(thrust::seq, - ip0_start, - ip0_start + insection_size, - ip0_start, - [property0, myrank] __device__(auto idx) { - printf( - "myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); - return property0[static_cast(idx)]; - }); + thrust::transform( + thrust::seq, + ip0_start, + ip0_start + insection_size, + ip0_start, + [property0, myrank] __device__(auto idx) { + if (false) printf("myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); + return property0[static_cast(idx)]; + }); /// - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - printf("rank = %d inbrp0 = %d ", static_cast(rank), static_cast(ip0_start[k])); + if (false) + printf("rank = %d inbrp0 = %d ", static_cast(rank), static_cast(ip0_start[k])); } - printf("\n"); + if (false) printf("\n"); /// @@ -660,29 +669,31 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip1_start, // indices thrust::less()); - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); + if (false) + printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); } - printf("\n"); - - thrust::transform(thrust::seq, - ip1_start, - ip1_start + insection_size, - ip1_start, - [property1, myrank] __device__(auto idx) { - printf( - "myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); - return property1[static_cast(idx)]; - }); + if (false) printf("\n"); + + thrust::transform( + thrust::seq, + ip1_start, + ip1_start + insection_size, + ip1_start, + [property1, myrank] __device__(auto idx) { + if (false) printf("myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); + return property1[static_cast(idx)]; + }); /// - printf("\n"); + if (false) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); + if (false) + printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); } - printf("\n"); + if (false) printf("\n"); /// } @@ -869,6 +880,8 @@ nbr_intersection(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; + bool DEBUG_CODE = false; + using edge_partition_e_input_device_view_t = std::conditional_t< std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, @@ -982,19 +995,20 @@ nbr_intersection(raft::handle_t const& handle, } } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector( - "unique_majors", unique_majors.data(), unique_majors.size(), std::cout); + raft::print_device_vector( + "unique_majors", unique_majors.data(), unique_majors.size(), std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } // 2.2 Send majors and group (major_comm_rank, edge_partition_idx) counts @@ -1035,25 +1049,26 @@ nbr_intersection(raft::handle_t const& handle, size_t{0}); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector( - "d_tx_group_counts", d_tx_group_counts.data(), d_tx_group_counts.size(), std::cout); + raft::print_device_vector( + "d_tx_group_counts", d_tx_group_counts.data(), d_tx_group_counts.size(), std::cout); - std::cout << "tx_counts:"; - std::copy(tx_counts.data(), - tx_counts.data() + tx_counts.size(), - std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; + std::cout << "tx_counts:"; + std::copy(tx_counts.data(), + tx_counts.data() + tx_counts.size(), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } std::tie(rx_majors, rx_major_counts) = shuffle_values(major_comm, unique_majors.begin(), tx_counts, handle.get_stream()); @@ -1065,27 +1080,28 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("rx_majors", rx_majors.data(), rx_majors.size(), std::cout); + raft::print_device_vector("rx_majors", rx_majors.data(), rx_majors.size(), std::cout); - std::cout << "rx_major_counts:"; - std::copy(rx_major_counts.data(), - rx_major_counts.data() + rx_major_counts.size(), - std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; + std::cout << "rx_major_counts:"; + std::copy(rx_major_counts.data(), + rx_major_counts.data() + rx_major_counts.size(), + std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; - raft::print_device_vector( - "rx_group_counts", rx_group_counts.data(), rx_group_counts.size(), std::cout); + raft::print_device_vector( + "rx_group_counts", rx_group_counts.data(), rx_group_counts.size(), std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } // 2.3. Enumerate degrees and neighbors for the received majors @@ -1115,21 +1131,22 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span( rx_group_counts.data(), rx_group_counts.size())}); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("rx_reordered_group_counts", - rx_reordered_group_counts.data(), - rx_reordered_group_counts.size(), - std::cout); + raft::print_device_vector("rx_reordered_group_counts", + rx_reordered_group_counts.data(), + rx_reordered_group_counts.size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } rmm::device_uvector d_rx_reordered_group_lasts(rx_reordered_group_counts.size(), handle.get_stream()); @@ -1150,19 +1167,20 @@ nbr_intersection(raft::handle_t const& handle, rx_group_counts.end(), rx_group_firsts.begin()); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector( - "rx_group_firsts", rx_group_firsts.data(), rx_group_firsts.size(), std::cout); + raft::print_device_vector( + "rx_group_firsts", rx_group_firsts.data(), rx_group_firsts.size(), std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } local_degrees_for_rx_majors.resize(rx_majors.size(), handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { @@ -1191,21 +1209,22 @@ nbr_intersection(raft::handle_t const& handle, local_degrees_for_rx_majors.size())}); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("local_degrees_for_rx_majors", - local_degrees_for_rx_majors.data(), - local_degrees_for_rx_majors.size(), - std::cout); + raft::print_device_vector("local_degrees_for_rx_majors", + local_degrees_for_rx_majors.data(), + local_degrees_for_rx_majors.size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } rmm::device_uvector local_nbr_offsets_for_rx_majors( local_degrees_for_rx_majors.size() + 1, handle.get_stream()); @@ -1224,21 +1243,22 @@ nbr_intersection(raft::handle_t const& handle, (*local_nbrs_properties_for_rx_majors) .resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("local_nbr_offsets_for_rx_majors", - local_nbr_offsets_for_rx_majors.data(), - local_nbr_offsets_for_rx_majors.size(), - std::cout); + raft::print_device_vector("local_nbr_offsets_for_rx_majors", + local_nbr_offsets_for_rx_majors.data(), + local_nbr_offsets_for_rx_majors.size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = @@ -1279,21 +1299,22 @@ nbr_intersection(raft::handle_t const& handle, (*local_nbrs_properties_for_rx_majors).size())}); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("local_nbrs_for_rx_majors", - local_nbrs_for_rx_majors.data(), - local_nbrs_for_rx_majors.size(), - std::cout); + raft::print_device_vector("local_nbrs_for_rx_majors", + local_nbrs_for_rx_majors.data(), + local_nbrs_for_rx_majors.size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); std::inclusive_scan( @@ -1317,21 +1338,22 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); handle.sync_stream(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("d_local_nbr_counts", - d_local_nbr_counts.data(), - d_local_nbr_counts.size(), - std::cout); + raft::print_device_vector("d_local_nbr_counts", + d_local_nbr_counts.data(), + d_local_nbr_counts.size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } // 2.4 Send the degrees and neighbors back @@ -1351,26 +1373,30 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_offsets).begin() + 1); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("(*major_nbr_offsets)", - (*major_nbr_offsets).data(), - (*major_nbr_offsets).size(), - std::cout); + raft::print_device_vector("(*major_nbr_offsets)", + (*major_nbr_offsets).data(), + (*major_nbr_offsets).size(), + std::cout); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { + major_nbr_properties = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + std::tie(*major_nbr_properties, std::ignore) = shuffle_values(major_comm, (*local_nbrs_properties_for_rx_majors).begin(), @@ -1378,28 +1404,29 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("(*major_nbr_indices)", - (*major_nbr_indices).data(), - (*major_nbr_indices).size(), - std::cout); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_properties)", - (*major_nbr_properties).data(), - (*major_nbr_properties).size(), + raft::print_device_vector("(*major_nbr_indices)", + (*major_nbr_indices).data(), + (*major_nbr_indices).size(), std::cout); - } - std::cout << "------------------" << std::endl; + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_properties)", + (*major_nbr_properties).data(), + (*major_nbr_properties).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } major_to_idx_map_ptr = std::make_unique>( unique_majors.begin(), @@ -1604,28 +1631,29 @@ nbr_intersection(raft::handle_t const& handle, auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("(*major_nbr_indices)", - (*major_nbr_indices).data(), - (*major_nbr_indices).size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_properties)", - (*major_nbr_properties).data(), - (*major_nbr_properties).size(), + raft::print_device_vector("(*major_nbr_indices)", + (*major_nbr_indices).data(), + (*major_nbr_indices).size(), std::cout); - } - std::cout << "------------------" << std::endl; + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*major_nbr_properties)", + (*major_nbr_properties).data(), + (*major_nbr_properties).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } auto second_element_to_idx_map = detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view()); @@ -1673,34 +1701,35 @@ nbr_intersection(raft::handle_t const& handle, // auto const comm_rank = comm.get_rank(); // auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_v_pair_nbr_intersection_indices", + rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size(), + std::cout); - raft::print_device_vector("rx_v_pair_nbr_intersection_indices", - rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size(), - std::cout); + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", + (*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size(), + std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", - (*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size(), - std::cout); + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", + (*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size(), + std::cout); + } - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", - (*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size(), - std::cout); + std::cout << "------------------" << std::endl; } - - std::cout << "------------------" << std::endl; + comm.barrier(); } - comm.barrier(); - } // } } else { @@ -1749,34 +1778,35 @@ nbr_intersection(raft::handle_t const& handle, auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("rx_v_pair_nbr_intersection_indices", + rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size(), + std::cout); - raft::print_device_vector("rx_v_pair_nbr_intersection_indices", - rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size(), - std::cout); + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", + (*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size(), + std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", - (*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size(), - std::cout); + raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", + (*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size(), + std::cout); + } - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", - (*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size(), - std::cout); + std::cout << "------------------" << std::endl; } - - std::cout << "------------------" << std::endl; + comm.barrier(); } - comm.barrier(); - } } thrust::inclusive_scan(handle.get_thrust_policy(), @@ -2009,34 +2039,35 @@ nbr_intersection(raft::handle_t const& handle, auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-device_multicast_sendrecv) :" << comm_rank - << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-device_multicast_sendrecv) :" << comm_rank + << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("gathered_nbr_intersection_indices", + gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size(), + std::cout); - raft::print_device_vector("gathered_nbr_intersection_indices", - gathered_nbr_intersection_indices.data(), - gathered_nbr_intersection_indices.size(), - std::cout); + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*gathered_nbr_intersection_properties0)", + (*gathered_nbr_intersection_properties0).data(), + (*gathered_nbr_intersection_properties0).size(), + std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*gathered_nbr_intersection_properties0)", - (*gathered_nbr_intersection_properties0).data(), - (*gathered_nbr_intersection_properties0).size(), - std::cout); + raft::print_device_vector("(*gathered_nbr_intersection_properties1)", + (*gathered_nbr_intersection_properties1).data(), + (*gathered_nbr_intersection_properties1).size(), + std::cout); + } - raft::print_device_vector("(*gathered_nbr_intersection_properties1)", - (*gathered_nbr_intersection_properties1).data(), - (*gathered_nbr_intersection_properties1).size(), - std::cout); + std::cout << "------------------" << std::endl; } - - std::cout << "------------------" << std::endl; + comm.barrier(); } - comm.barrier(); - } } if constexpr (!std::is_same_v) { @@ -2093,34 +2124,35 @@ nbr_intersection(raft::handle_t const& handle, auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-gather) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after-gather) :" << comm_rank << " partition index:" << i + << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("combined_nbr_intersection_indices", + combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size(), + std::cout); - raft::print_device_vector("combined_nbr_intersection_indices", - combined_nbr_intersection_indices.data(), - combined_nbr_intersection_indices.size(), - std::cout); + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*combined_nbr_intersection_properties0)", + (*combined_nbr_intersection_properties0).data(), + (*combined_nbr_intersection_properties0).size(), + std::cout); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*combined_nbr_intersection_properties0)", - (*combined_nbr_intersection_properties0).data(), - (*combined_nbr_intersection_properties0).size(), - std::cout); + raft::print_device_vector("(*combined_nbr_intersection_properties1)", + (*combined_nbr_intersection_properties1).data(), + (*combined_nbr_intersection_properties1).size(), + std::cout); + } - raft::print_device_vector("(*combined_nbr_intersection_properties1)", - (*combined_nbr_intersection_properties1).data(), - (*combined_nbr_intersection_properties1).size(), - std::cout); + std::cout << "------------------" << std::endl; } - - std::cout << "------------------" << std::endl; + comm.barrier(); } - comm.barrier(); - } } } @@ -2299,38 +2331,39 @@ nbr_intersection(raft::handle_t const& handle, size_t num_copied{0}; size_t num_scanned{0}; - { + if constexpr (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (before while loop) :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (before while loop) :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), std::cout); - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); - std::cout << "------------------" << std::endl; + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } while (num_scanned < nbr_intersection_indices.size()) { @@ -2372,38 +2405,39 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_properties1 = std::move(tmp_properties1); } - { + if constexpr (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after while loop) :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank (after while loop) :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), std::cout); - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); - std::cout << "------------------" << std::endl; + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } #else @@ -2440,37 +2474,38 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.begin() + 1); } - { + if constexpr (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), + raft::print_device_vector("nbr_intersection_indices", + nbr_intersection_indices.data(), + nbr_intersection_indices.size(), std::cout); - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } + if constexpr (!std::is_same_v) { + raft::print_device_vector("(*nbr_intersection_properties0)", + (*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size(), + std::cout); - std::cout << "------------------" << std::endl; + raft::print_device_vector("(*nbr_intersection_properties1)", + (*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } // 5. Return diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index 3d183cf45f1..2e3894e22e3 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -166,6 +166,7 @@ struct call_intersection_op_t { dst_prop = *(vertex_property_first + dst_offset); } + /* printf("(%d <-> %d) %d %d %d\n", static_cast(src), static_cast(dst), @@ -177,6 +178,7 @@ struct call_intersection_op_t { printf("%d ", static_cast(*(intersection.data() + k))); } printf("\n"); + */ // if constexpr (std::is_same_v) { *(major_minor_pair_value_output_first + index) = @@ -244,6 +246,7 @@ void per_v_pair_transform_dst_nbr_intersection( { static_assert(!GraphViewType::is_storage_transposed); + bool DEBUG_CODE = false; using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using property_t = typename thrust::iterator_traits::value_type; @@ -345,16 +348,17 @@ void per_v_pair_transform_dst_nbr_intersection( auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "------------------" << std::endl; + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } auto edge_partition = @@ -424,40 +428,41 @@ void per_v_pair_transform_dst_nbr_intersection( auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); + if (DEBUG_CODE) + for (int k = 0; k < comm_size; k++) { + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("intersection_offsets", - intersection_offsets.data(), - intersection_offsets.size(), - std::cout); - - raft::print_device_vector("intersection_indices", - intersection_indices.data(), - intersection_indices.size(), - std::cout); - - // if constexpr (!std::is_same_v) { - if (r_nbr_intersection_properties0) { - raft::print_device_vector("r_nbr_intersection_properties0", - r_nbr_intersection_properties0->data(), - r_nbr_intersection_properties0->size(), + raft::print_device_vector("intersection_offsets", + intersection_offsets.data(), + intersection_offsets.size(), std::cout); - } - if (r_nbr_intersection_properties1) { - raft::print_device_vector("r_nbr_intersection_properties1", - r_nbr_intersection_properties1->data(), - r_nbr_intersection_properties1->size(), + + raft::print_device_vector("intersection_indices", + intersection_indices.data(), + intersection_indices.size(), std::cout); - } - std::cout << "------------------" << std::endl; + // if constexpr (!std::is_same_v) { + if (r_nbr_intersection_properties0) { + raft::print_device_vector("r_nbr_intersection_properties0", + r_nbr_intersection_properties0->data(), + r_nbr_intersection_properties0->size(), + std::cout); + } + if (r_nbr_intersection_properties1) { + raft::print_device_vector("r_nbr_intersection_properties1", + r_nbr_intersection_properties1->data(), + r_nbr_intersection_properties1->size(), + std::cout); + } + + std::cout << "------------------" << std::endl; + } + comm.barrier(); } - comm.barrier(); - } } if (unique_vertices) { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 8336fccdf9f..13ce6634c87 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -342,6 +342,10 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo # - SIMILARITY tests ------------------------------------------------------------------------------ ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp) +################################################################################################### +# - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------ +ConfigureTest(WEIGHTED_SIMILARITY_TEST link_prediction/weighted_similarity_test.cpp) + ################################################################################################### # - RANDOM_WALKS tests ---------------------------------------------------------------------------- # FIXME: Rename to random_walks_test.cu once the legacy implementation is deleted @@ -592,6 +596,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG RANDOM_WALKS tests --------------------------------------------------------------------- ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) + ############################################################################################### + # - MG WEIGHTED_SIMILARITY tests ----------------------------------------------------------------------- + ConfigureTestMG(MG_WEIGHTED_SIMILARITY_TEST link_prediction/mg_weighted_similarity_test.cpp) + ############################################################################################### # - MG SIMILARITY tests ----------------------------------------------------------------------- ConfigureTestMG(MG_SIMILARITY_TEST link_prediction/mg_similarity_test.cpp) diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp new file mode 100644 index 00000000000..ba5df6d6462 --- /dev/null +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -0,0 +1,400 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +struct Similarity_Usecase { + bool use_weights{true}; + bool check_correctness{true}; + size_t max_seeds{std::numeric_limits::max()}; +}; + +template +class Tests_MGSimilarity + : public ::testing::TestWithParam> { + public: + Tests_MGSimilarity() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple param, + test_functor_t const& test_functor) + { + auto [similarity_usecase, input_usecase] = param; + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + auto [mg_graph, mg_edge_weights, d_mg_renumber_map_labels] = + cugraph::test::construct_graph( + *handle_, input_usecase, true, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 2. run similarity + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; + + rmm::device_uvector d_start_vertices( + std::min( + static_cast(mg_graph_view.local_vertex_partition_range_size()), + similarity_usecase.max_seeds / comm_size + + (static_cast(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)), + handle_->get_stream()); + + /* + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector( + "d_start_vertices", d_start_vertices.data(), d_start_vertices.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + */ + + cugraph::test::populate_vertex_ids( + *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first()); + + /* + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector( + "d_start_vertices", d_start_vertices.data(), d_start_vertices.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + */ + + auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs( + *handle_, + mg_graph_view, + raft::device_span(d_start_vertices.data(), d_start_vertices.size()), + 2); + + auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices); + auto h_offsets = cugraph::test::to_host(*handle_, d_offsets); + + std::vector h_v1(h_offsets.back()); + for (size_t i = 0; i < h_start_vertices.size(); ++i) { + std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]); + } + + auto d_v1 = cugraph::test::to_device(*handle_, h_v1); + auto d_v2 = std::move(two_hop_nbrs); + + /* + ////// + + bool test_weighted = true; + bool renumber = true; + std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; + std::tie(mg_graph, mg_edge_weights, d_mg_renumber_map_labels) = + cugraph::test::read_graph_from_csv_file( + *handle_, file_path, test_weighted, renumber); + + std::tie(mg_graph, mg_edge_weights, d_mg_renumber_map_labels) = cugraph::symmetrize_graph( + *handle_, + std::move(mg_graph), + std::move(mg_edge_weights), + d_mg_renumber_map_labels + ? std::optional>(std::move(*d_mg_renumber_map_labels)) + : std::nullopt, + false); + + mg_graph_view = mg_graph.view(); + mg_edge_weight_view = (*mg_edge_weights).view(); + + //// + + std::vector h_v1 = {}; + if (comm_rank==0){h_v1.push_back(2);} + auto d_v1 = cugraph::test::to_device(*handle_, h_v1); + + std::vector h_v2 = {}; + if (comm_rank==0){h_v2.push_back(3);} + auto d_v2 = cugraph::test::to_device(*handle_, h_v2); + + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("d_v1", d_v1.data(), d_v1.size(), std::cout); + + raft::print_device_vector("d_v2", d_v2.data(), d_v2.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + */ + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + edge_t, + weight_t, + int32_t>(*handle_, + std::move(d_v1), + std::move(d_v2), + std::nullopt, + std::nullopt, + std::nullopt, + mg_graph_view.vertex_partition_range_lasts()); + + for (int k = 0; k < comm_size; k++) { + auto& comm = handle_->get_comms(); + + comm.barrier(); + if (comm_rank == k) { + std::cout << "Rank :" << comm_rank << std::endl; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("d_v1", d_v1.data(), d_v1.size(), std::cout); + + raft::print_device_vector("d_v2", d_v2.data(), d_v2.size(), std::cout); + + std::cout << "------------------" << std::endl; + } + comm.barrier(); + } + + std::tuple, raft::device_span> vertex_pairs{ + {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG similarity test"); + } + + auto result_score = test_functor.run( + *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. compare SG & MG results + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = + cugraph::test::graph_to_host_coo(*handle_, mg_graph_view, mg_edge_weight_view); + + d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size()); + d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size()); + result_score = + cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size()); + + if (d_v1.size() > 0) { + auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1); + auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2); + auto h_result_score = cugraph::test::to_host(*handle_, result_score); + + if (wgt && similarity_usecase.use_weights) { + weighted_similarity_compare(mg_graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair1, h_vertex_pair2), + h_result_score, + test_functor); + } else { + similarity_compare(mg_graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair1, h_vertex_pair2), + h_result_score, + test_functor); + } + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGSimilarity::handle_ = nullptr; + +using Tests_MGSimilarity_File = Tests_MGSimilarity; +using Tests_MGSimilarity_Rmat = Tests_MGSimilarity; + +TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGSimilarity_File, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}), + ::testing::Values(Similarity_Usecase{true, true, 20}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx") + // , cugraph::test::File_Usecase("test/datasets/netscience.mtx") + ))); +// #if 0 +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGSimilarity_Rmat, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{true, true, 20}, + // Similarity_Usecase{false, true, 20}), + ::testing::Values(Similarity_Usecase{true, true, 20}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGSimilarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(Similarity_Usecase{true, false, 20}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); +// #endif +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_prediction/similarity_compare.cpp b/cpp/tests/link_prediction/similarity_compare.cpp index f005b4ddcef..47b73311332 100644 --- a/cpp/tests/link_prediction/similarity_compare.cpp +++ b/cpp/tests/link_prediction/similarity_compare.cpp @@ -37,6 +37,178 @@ struct intersection_count_t { namespace cugraph { namespace test { +template +void weighted_similarity_compare( + vertex_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& similarity_score, + test_t const& test_functor) +{ + auto& [graph_src, graph_dst, graph_wgt] = edge_list; + auto& [v1, v2] = vertex_pairs; + + auto compare_pairs = [](thrust::tuple lhs, + thrust::tuple rhs) { + return ((thrust::get<0>(lhs) < thrust::get<0>(rhs)) || + ((thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) < thrust::get<1>(rhs)))); + }; + + std::sort(thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()), + thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()), + compare_pairs); + + // FIXME: This only tests unweighted, weighted implementation needs to be different + std::vector vertex_degrees(static_cast(num_vertices), size_t{0}); + std::vector weighted_vertex_degrees(static_cast(num_vertices), weight_t{0}); + + std::for_each( + graph_src.begin(), graph_src.end(), [&vertex_degrees](auto v) { ++vertex_degrees[v]; }); + + std::for_each( + thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()), + thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()), + [&weighted_vertex_degrees](thrust::tuple src_dst_wgt) { + auto src = thrust::get<0>(src_dst_wgt); + auto dst = thrust::get<1>(src_dst_wgt); + auto wgt = thrust::get<2>(src_dst_wgt); + + weighted_vertex_degrees[src] += wgt / weight_t{2}; + weighted_vertex_degrees[dst] += wgt / weight_t{2}; + }); + + auto compare_functor = cugraph::test::nearly_equal{ + weight_t{1e-3}, weight_t{(weight_t{1} / static_cast(num_vertices)) * weight_t{1e-3}}}; + + if (graph_wgt) { + assert(true); + } else { + assert(false); + } + + auto graph_wgt_first = (*graph_wgt).begin(); + + std::for_each(graph_wgt_first, graph_wgt_first + (*graph_wgt).size(), [](auto val) { + std::cout << "w: " << val << std::endl; + }); + + std::for_each( + thrust::make_zip_iterator(v1.begin(), v2.begin(), similarity_score.begin()), + thrust::make_zip_iterator(v1.end(), v2.end(), similarity_score.end()), + [compare_functor, + test_functor, + &vertex_degrees, + &weighted_vertex_degrees, + &graph_src, + &graph_dst, + &graph_wgt_first](auto tuple) { + auto v1 = thrust::get<0>(tuple); + auto v2 = thrust::get<1>(tuple); + auto score = thrust::get<2>(tuple); + + auto v1_begin = + std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v1)); + auto v1_end = + std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v1)); + + auto v2_begin = + std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v2)); + auto v2_end = + std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v2)); + + std::vector intersection(std::min((v1_end - v1_begin), (v2_end - v2_begin))); + + auto intersection_end = std::set_intersection(graph_dst.begin() + v1_begin, + graph_dst.begin() + v1_end, + graph_dst.begin() + v2_begin, + graph_dst.begin() + v2_end, + intersection.begin()); + + auto intersection_size = + static_cast(std::distance(intersection.begin(), intersection_end)); + + std::vector intersected_weights_v1(static_cast(intersection_size), + weight_t{0}); + + std::vector intersected_weights_v2(static_cast(intersection_size), + weight_t{0}); + + int intersected_weight_idx = 0; + + std::for_each( + intersection.begin(), + intersection_end, + [&graph_dst, + &graph_wgt_first, + &v1_begin, + &v1_end, + &v2_begin, + &v2_end, + &intersected_weights_v1, + &intersected_weights_v2, + &intersected_weight_idx](auto inbr) { + auto lower = + std::lower_bound(graph_dst.begin() + v1_begin, graph_dst.begin() + v1_end, inbr); + auto offset = std::distance(graph_dst.begin() + v1_begin, lower); + + intersected_weights_v1[intersected_weight_idx] = + static_cast(graph_wgt_first[v1_begin + offset]); + + lower = std::lower_bound(graph_dst.begin() + v2_begin, graph_dst.begin() + v2_end, inbr); + + offset = std::distance(graph_dst.begin() + v2_begin, lower); + + intersected_weights_v2[intersected_weight_idx] = + static_cast(graph_wgt_first[v2_begin + offset]); + + std::cout << "intersected_weights_v1: " << intersected_weights_v1[intersected_weight_idx] + << std::endl; + std::cout << "intersected_weights_v2: " << intersected_weights_v2[intersected_weight_idx] + << std::endl; + ++intersected_weight_idx; + }); + + weight_t sum_intersected_weights_v1 = + std::accumulate(intersected_weights_v1.begin(), intersected_weights_v1.end(), 0.0); + weight_t sum_intersected_weights_v2 = + std::accumulate(intersected_weights_v2.begin(), intersected_weights_v2.end(), 0.0); + + weight_t sum_of_uniq_weights_v1 = weighted_vertex_degrees[v1] - sum_intersected_weights_v1; + weight_t sum_of_uniq_weights_v2 = weighted_vertex_degrees[v2] - sum_intersected_weights_v2; + + weight_t min_weight_v1_intersect_v2 = weight_t{0}; + weight_t max_weight_v1_intersect_v2 = weight_t{0}; + + std::for_each( + thrust::make_zip_iterator(intersected_weights_v1.begin(), intersected_weights_v2.begin()), + thrust::make_zip_iterator(intersected_weights_v1.end(), intersected_weights_v2.end()), + [&min_weight_v1_intersect_v2, + &max_weight_v1_intersect_v2](thrust::tuple w1_w2) { + min_weight_v1_intersect_v2 += std::min(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2)); + max_weight_v1_intersect_v2 += std::max(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2)); + }); + + max_weight_v1_intersect_v2 += (sum_of_uniq_weights_v1 + sum_of_uniq_weights_v2); + + std::cout << "wdegs: " << weighted_vertex_degrees[v1] << " " << weighted_vertex_degrees[v2] + << std::endl; + std::cout << "min_i, max:" << min_weight_v1_intersect_v2 << " " << max_weight_v1_intersect_v2 + << std::endl; + + auto expected_score = test_functor.compute_score(weighted_vertex_degrees[v1], + weighted_vertex_degrees[v2], + min_weight_v1_intersect_v2, + max_weight_v1_intersect_v2); + + std::cout << "score: " << score << " expected_score: " << expected_score << std::endl; + + EXPECT_TRUE(compare_functor(score, expected_score)) + << "score mismatch, got " << score << ", expected " << expected_score; + }); +} + template void similarity_compare( vertex_t num_vertices, @@ -96,9 +268,13 @@ void similarity_compare( intersection.begin()); auto expected_score = test_functor.compute_score( - vertex_degrees[v1], - vertex_degrees[v2], - static_cast(std::distance(intersection.begin(), intersection_end))); + static_cast(vertex_degrees[v1]), + static_cast(vertex_degrees[v2]), + static_cast(std::distance(intersection.begin(), intersection_end)), + static_cast(vertex_degrees[v1] + vertex_degrees[v2] - + std::distance(intersection.begin(), intersection_end))); + + std::cout << "score: " << score << " expected_score: " << expected_score << std::endl; EXPECT_TRUE(compare_functor(score, expected_score)) << "score mismatch, got " << score << ", expected " << expected_score; @@ -153,5 +329,55 @@ template void similarity_compare( std::vector& result_score, test_overlap_t const& test_functor); +//// + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_jaccard_t const& test_functor); + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_sorensen_t const& test_functor); + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_overlap_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_jaccard_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_sorensen_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_overlap_t const& test_functor); + } // namespace test } // namespace cugraph diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index 0fbb3b40b39..d0b192ff894 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -29,10 +29,12 @@ struct test_jaccard_t { std::string testname{"Jaccard"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t a_intersect_b, + weight_t a_union_b) const { - return static_cast(intersection_count) / - static_cast(u_size + v_size - intersection_count); + return a_intersect_b / a_union_b; } template @@ -51,9 +53,12 @@ struct test_sorensen_t { std::string testname{"Sorensen"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t a_intersect_b, + weight_t a_union_b) const { - return static_cast(2 * intersection_count) / static_cast(u_size + v_size); + return (2 * a_intersect_b) / (weight_a + weight_b); } template @@ -72,10 +77,12 @@ struct test_overlap_t { std::string testname{"Overlap"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t a_intersect_b, + weight_t a_union_b) const { - return static_cast(intersection_count) / - static_cast(std::min(u_size, v_size)); + return a_intersect_b / std::min(weight_a, weight_b); } template @@ -99,5 +106,13 @@ void similarity_compare( std::vector& similarity_score, test_t const& test_functor); +template +void weighted_similarity_compare( + vertex_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& similarity_score, + test_t const& test_functor); } // namespace test } // namespace cugraph diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp new file mode 100644 index 00000000000..f41f35b0776 --- /dev/null +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +struct Similarity_Usecase { + bool use_weights{false}; + bool check_correctness{true}; + size_t max_seeds{std::numeric_limits::max()}; + size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; +}; + +template +class Tests_Similarity + : public ::testing::TestWithParam> { + public: + Tests_Similarity() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param, + test_functor_t const& test_functor) + { + constexpr bool renumber = true; + auto [similarity_usecase, input_usecase] = param; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 2. create SG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, similarity_usecase.use_weights, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. run similarity + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Similarity test"); + } + + // + // FIXME: Don't currently have an MG implementation of 2-hop neighbors. + // For now we'll do that on the CPU (really slowly, so keep max_seed + // small) + // + rmm::device_uvector d_v1(0, handle.get_stream()); + rmm::device_uvector d_v2(0, handle.get_stream()); + + { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t max_vertices = std::min(static_cast(graph_view.number_of_vertices()), + similarity_usecase.max_seeds); + std::vector h_v1; + std::vector h_v2; + std::vector one_hop_v1; + std::vector one_hop_v2; + + for (size_t seed = 0; seed < max_vertices; ++seed) { + std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + [&one_hop_v1, &one_hop_v2, seed](auto t) { + auto u = thrust::get<0>(t); + auto v = thrust::get<1>(t); + if (u == seed) { + one_hop_v1.push_back(u); + one_hop_v2.push_back(v); + } + }); + } + + std::for_each(thrust::make_zip_iterator(one_hop_v1.begin(), one_hop_v2.begin()), + thrust::make_zip_iterator(one_hop_v1.end(), one_hop_v2.end()), + [&](auto t1) { + auto seed = thrust::get<0>(t1); + auto neighbor = thrust::get<1>(t1); + std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + [&](auto t2) { + auto u = thrust::get<0>(t2); + auto v = thrust::get<1>(t2); + if (u == neighbor) { + h_v1.push_back(seed); + h_v2.push_back(v); + } + }); + }); + + std::sort(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), + thrust::make_zip_iterator(h_v1.end(), h_v2.end())); + + auto end_iter = std::unique(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), + thrust::make_zip_iterator(h_v1.end(), h_v2.end()), + [](auto t1, auto t2) { + return (thrust::get<0>(t1) == thrust::get<0>(t2)) && + (thrust::get<1>(t1) == thrust::get<1>(t2)); + }); + + h_v1.resize( + thrust::distance(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), end_iter)); + h_v2.resize(h_v1.size()); + + d_v1.resize(h_v1.size(), handle.get_stream()); + d_v2.resize(h_v2.size(), handle.get_stream()); + + raft::update_device(d_v1.data(), h_v1.data(), h_v1.size(), handle.get_stream()); + raft::update_device(d_v2.data(), h_v2.data(), h_v2.size(), handle.get_stream()); + } + + // FIXME: Need to add some tests that specify actual vertex pairs + // FIXME: Need to a variation that calls call the two hop neighbors function + // FIXME: Debugging state as of EOD 9/28: + // 1) Tested case of no vertex pairs... works great :-) + // 2) Don't have a 2-hop on GPU yet. Perhaps write a 2-hop on CPU + // for now? We could then use that for testing the 2-hop function + // later. + std::tuple, raft::device_span> vertex_pairs{ + {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}}; + + auto result_score = test_functor.run( + handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t check_size = std::min(d_v1.size(), similarity_usecase.max_vertex_pairs_to_check); + + // + // FIXME: Need to reorder here. thrust::shuffle on the tuples (vertex_pairs_1, + // vertex_pairs_2, result_score) would + // be sufficient. + // + std::vector h_vertex_pair_1(check_size); + std::vector h_vertex_pair_2(check_size); + std::vector h_result_score(check_size); + + raft::update_host( + h_vertex_pair_1.data(), std::get<0>(vertex_pairs).data(), check_size, handle.get_stream()); + raft::update_host( + h_vertex_pair_2.data(), std::get<1>(vertex_pairs).data(), check_size, handle.get_stream()); + raft::update_host( + h_result_score.data(), result_score.data(), check_size, handle.get_stream()); + + similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } + } +}; + +using Tests_Similarity_File = Tests_Similarity; +// using Tests_Similarity_Rmat = Tests_Similarity; + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +// } + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +// } + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +// } + +// TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap) +// { +// run_current_test( +// override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +// } + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Similarity_File, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, + // 100}), + ::testing::Values(Similarity_Usecase{false, true, 20, 100}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx") + // , + // cugraph::test::File_Usecase("test/datasets/dolphins.mtx") + ))); + +// INSTANTIATE_TEST_SUITE_P( +// rmat_small_test, +// Tests_Similarity_Rmat, +// ::testing::Combine( +// // enable correctness checks +// // Disable weighted computation testing in 22.10 +// //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, +// 20, +// // 100}), +// ::testing::Values(Similarity_Usecase{false, true, 20, 100}), +// ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +// INSTANTIATE_TEST_SUITE_P( +// file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with +// --gtest_filter to select only the file_benchmark_test with a specific +// vertex & edge type combination) by command line arguments and do not +// include more than one File_Usecase that differ only in filename +// (to avoid running same benchmarks more than once) */ +// Tests_Similarity_File, +// ::testing::Combine( +// // disable correctness checks +// // Disable weighted computation testing in 22.10 +// //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), +// ::testing::Values(Similarity_Usecase{false, false}), +// ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +// INSTANTIATE_TEST_SUITE_P( +// rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with +// --gtest_filter to select only the rmat_benchmark_test with a specific +// vertex & edge type combination) by command line arguments and do not +// include more than one Rmat_Usecase that differ only in scale or edge +// factor (to avoid running same benchmarks more than once) */ +// Tests_Similarity_Rmat, +// ::testing::Combine( +// // disable correctness checks for large graphs +// //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), +// ::testing::Values(Similarity_Usecase{false, false}), +// ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 4bb94aaadde..2d3a054a55d 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -118,7 +118,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.display_and_clear(std::cout); } - std::string file_path = "/home/nfs/mnaim/csv/grid.csv"; + std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; constexpr bool store_transposed = false; constexpr bool multi_gpu = true; @@ -592,8 +592,9 @@ class Tests_MGPerVPairTransformDstNbrIntersection [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { cuco::detail::MurmurHash3_32 hash_func{}; // use hash_func to generate arbitrary vertex pairs - auto v0 = static_cast(hash_func(i + comm_rank) % num_vertices); - auto v1 = static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + auto v0 = 2; // static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = + 3; // static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); printf("comm_rank=%d v0= %d, v1=%d\n", static_cast(comm_rank), static_cast(v0), @@ -842,7 +843,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{5}, true}), + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1}, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); // INSTANTIATE_TEST_SUITE_P(rmat_small_test, diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 16c9d3ed145..13afddd92bd 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -144,6 +144,7 @@ class File_Usecase : public detail::TranslateGraph_Usecase { std::optional> weights{}; std::optional> vertices{}; bool is_symmetric{}; + std::cout << "graph_file_full_path_: " << graph_file_full_path_ << std::endl; auto extension = graph_file_full_path_.substr(graph_file_full_path_.find_last_of(".") + 1); if (extension == "mtx") { std::tie(srcs, dsts, weights, vertices, is_symmetric) = From 2df17e09369880f7a1e780696087338f52e9f495 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Thu, 20 Jul 2023 14:49:50 -0700 Subject: [PATCH 04/22] weighted jaccard, sorsen and overlap tests, with debugging statements --- cpp/src/link_prediction/jaccard_impl.cuh | 5 +- cpp/src/link_prediction/overlap_impl.cuh | 5 +- cpp/src/link_prediction/similarity_impl.cuh | 6 +- cpp/src/link_prediction/sorensen_impl.cuh | 5 +- cpp/src/prims/detail/nbr_intersection.cuh | 198 ++++++++++++------ .../mg_weighted_similarity_test.cpp | 8 +- .../link_prediction/similarity_compare.hpp | 33 ++- 7 files changed, 183 insertions(+), 77 deletions(-) diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index b67911afbb7..1324ff78c80 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -30,7 +30,10 @@ struct jaccard_functor_t { weight_t cardinality_a_intersect_b, weight_t cardinality_a_union_b) const { - return cardinality_a_intersect_b / cardinality_a_union_b; + return (fabs(static_cast(cardinality_a_union_b) - double{0}) < + double{2} / double{1 << 30}) + ? weight_t{0} + : cardinality_a_intersect_b / cardinality_a_union_b; } }; diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index b1f3d87b5e4..e0ab3e5c757 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -30,7 +30,10 @@ struct overlap_functor_t { weight_t cardinality_a_intersect_b, weight_t cardinality_a_union_b) const { - return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b); + return (fabs(static_cast(cardinality_a_union_b) - double{0}) < + double{2} / double{1 << 30}) + ? weight_t{0} + : cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b); } }; diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index b4bed1eab53..1e60c1f5a23 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -91,7 +91,11 @@ rmm::device_uvector similarity( bool CODE_DEBUG = false; if (CODE_DEBUG) for (size_t k = 0; k < intersection.size(); k++) { - printf("=> %d %f %f\n", + printf("=> (v1 = %d v2 = %d wdeg(v1) = %f wdeg(v2) = %f) %d %f %f\n", + static_cast(v1), + static_cast(v2), + static_cast(weight_a), + static_cast(weight_b), static_cast(intersection[k]), static_cast(intersected_properties_a[k]), static_cast(intersected_properties_b[k])); diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index 0972b206c2a..d13620c0448 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -30,7 +30,10 @@ struct sorensen_functor_t { weight_t cardinality_a_intersect_b, weight_t cardinality_a_union_b) const { - return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b); + return (fabs(static_cast(cardinality_a_union_b) - double{0}) < + double{2} / double{1 << 30}) + ? weight_t{0} + : (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b); } }; diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index ef92932f7bd..9535d4d3f02 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -365,16 +365,17 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { { auto pair = *(vertex_pair_first + i); + bool debug = false; #if 1 - if (false) + if (debug) printf( "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); if constexpr (!std::is_same_v) { - if (false) printf("called with FirstElementToIdxMap********\n"); + if (debug) printf("called with FirstElementToIdxMap********\n"); } if constexpr (!std::is_same_v) { - if (false) printf("called with SecondElementToIdxMap---------\n"); + if (debug) printf("called with SecondElementToIdxMap---------\n"); } #endif @@ -385,7 +386,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree0{0}; if constexpr (std::is_same_v) { #if 1 - if (false) printf("element0 from edge_partition\n"); + if (debug) printf("element0 from edge_partition\n"); #endif vertex_t major = thrust::get<0>(pair); @@ -409,7 +410,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { #if 1 - if (false) printf("element0 from first_element_to_idx_map******\n"); + if (debug) printf("element0 from first_element_to_idx_map******\n"); #endif auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); @@ -438,7 +439,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { #if 1 vertex_t element0 = thrust::get<0>(pair); - if (false) + if (debug) printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", static_cast(element0), static_cast(local_degree0), @@ -449,31 +450,31 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { if constexpr (!std::is_same_v) { EdgeProperty_t ep{}; ep = edge_partition_e_value_input.get(local_edge_offset0 + k); - if (false) + if (debug) printf("( %d %d %.2f %.2f)* ", static_cast(k), static_cast(indices0[k]), static_cast(ep), static_cast(property0[k])); } else { - if (false) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); + if (debug) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } else { if constexpr (!std::is_same_v) { EdgeProperty_t ep = *(first_element_properties.begin() + local_edge_offset0 + k); - if (false) + if (debug) printf("(%d %d %.2f %.2f)* ", static_cast(k), static_cast(indices0[k]), static_cast(ep), static_cast(property0[k])); } else { - if (false) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); + if (debug) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); } } } - if (false) printf("\n"); + if (debug) printf("\n"); #endif vertex_t const* indices1{nullptr}; @@ -482,7 +483,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree1{0}; if constexpr (std::is_same_v) { #if 1 - if (false) printf("element1 from edge_partition\n"); + if (debug) printf("element1 from edge_partition\n"); #endif vertex_t major = thrust::get<1>(pair); @@ -506,7 +507,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { #if 1 - if (false) printf("element1 from second_element_to_idx_map----\n"); + if (debug) printf("element1 from second_element_to_idx_map----\n"); #endif auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); @@ -536,7 +537,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { #if 1 vertex_t element1 = thrust::get<1>(pair); - if (false) + if (debug) printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", static_cast(element1), static_cast(local_degree1), @@ -547,31 +548,31 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { if constexpr (!std::is_same_v) { EdgeProperty_t ep{}; ep = edge_partition_e_value_input.get(local_edge_offset1 + k); - if (false) + if (debug) printf("(%d %d %.2f %.2f)- ", static_cast(k), static_cast(indices1[k]), static_cast(ep), static_cast(property1[k])); } else { - if (false) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); + if (debug) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } else { if constexpr (!std::is_same_v) { EdgeProperty_t ep = *(second_element_properties.begin() + local_edge_offset1 + k); - if (false) + if (debug) printf("(%d %d %.2f %.2f)- ", static_cast(k), static_cast(indices1[k]), static_cast(ep), static_cast(property1[k])); } else { - if (false) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); + if (debug) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); } } } - if (false) printf("\n"); + if (debug) printf("\n"); #endif // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree @@ -594,33 +595,69 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { // auto insection_size = static_cast(thrust::distance(inbr_start, it)); - if (false) + if (debug) printf( "rank = %d insection_size=%d\n", static_cast(rank), static_cast(insection_size)); - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - if (false) printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); + if (debug) printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); if constexpr (!std::is_same_v) { - auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; - #if 1 - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < local_degree0; k++) { - if (false) printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); + if (debug) printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < local_degree1; k++) { - if (false) printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); + if (debug) printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); #endif + auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; + + /* + //// new block + size_t multiedge_intersection_size0 = 0; + + for (size_t k = 0; k < insection_size; k++) { + auto myrank = rank; + + auto first_occurence = thrust::lower_bound( + thrust::seq, indices0, indices0 + local_degree0, inbr_start[k], thrust::less()); + + auto last_occurence = + thrust::upper_bound(thrust::seq, indices0, indices0 + local_degree0, inbr_start[k]); + + size_t nr_repeats = (last_occurence - first_occurence); + + auto offset = thrust::distance(indices0, first_occurence); + + printf("For vertex %d repeats = %d\n", + static_cast(inbr_start[k]), + static_cast(nr_repeats)); + + for (size_t m = 0; m < nr_repeats; m++) { + // ip0_start[multiedge_intersection_size0 + m] = property0[offset + m]; + + if (debug) + printf(">>myrank=%d offset+m=%d p=%f\n", + static_cast(myrank), + static_cast(offset + m), + static_cast(property0[offset + m])); + } + multiedge_intersection_size0 += nr_repeats; + } + + //// + */ + // copy edge properties from first vertex to common neighbors thrust::lower_bound(thrust::seq, indices0, @@ -630,36 +667,72 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip0_start, // indices thrust::less()); - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - if (false) printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); + if (debug) printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); auto myrank = rank; - thrust::transform( - thrust::seq, - ip0_start, - ip0_start + insection_size, - ip0_start, - [property0, myrank] __device__(auto idx) { - if (false) printf("myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); - return property0[static_cast(idx)]; - }); + thrust::transform(thrust::seq, + ip0_start, + ip0_start + insection_size, + ip0_start, + [property0, myrank, debug] __device__(auto idx) { + if (debug) + printf("myrank=%d idx=%d p=%f\n", + static_cast(myrank), + static_cast(idx), + static_cast(property0[static_cast(idx)])); + return property0[static_cast(idx)]; + }); /// - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - if (false) + if (debug) printf("rank = %d inbrp0 = %d ", static_cast(rank), static_cast(ip0_start[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); /// auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; + /* + //// new block + size_t multiedge_intersection_size_1 = 0; + + for (size_t k = 0; k < insection_size; k++) { + auto myrank = rank; + auto first_occurence = thrust::lower_bound( + thrust::seq, indices1, indices1 + local_degree1, inbr_start[k], thrust::less()); + + auto last_occurence = + thrust::upper_bound(thrust::seq, indices1, indices1 + local_degree1, inbr_start[k]); + + size_t nr_repeats = (last_occurence - first_occurence); + + auto offset = thrust::distance(indices1, first_occurence); + + printf("For vertex %d repeats = %d\n", + static_cast(inbr_start[k]), + static_cast(nr_repeats)); + + for (size_t m = 0; m < nr_repeats; m++) { + // ip1_start[multiedge_intersection_size_1 + m] = property1[offset + m]; + + if (debug) + printf(">>myrank=%d offset+m=%d p=%f\n", + static_cast(myrank), + static_cast(offset + m), + static_cast(property1[offset + m])); + } + multiedge_intersection_size_1 += nr_repeats; + } + //// + */ // copy edge properties from second vertex to common neighbors thrust::lower_bound(thrust::seq, indices1, @@ -669,31 +742,34 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip1_start, // indices thrust::less()); - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - if (false) + if (debug) printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); } - if (false) printf("\n"); - - thrust::transform( - thrust::seq, - ip1_start, - ip1_start + insection_size, - ip1_start, - [property1, myrank] __device__(auto idx) { - if (false) printf("myrank=%d idx=%d\n", static_cast(myrank), static_cast(idx)); - return property1[static_cast(idx)]; - }); + if (debug) printf("\n"); + + thrust::transform(thrust::seq, + ip1_start, + ip1_start + insection_size, + ip1_start, + [property1, myrank, debug] __device__(auto idx) { + if (debug) + printf("myrank=%d idx=%d p=%f\n", + static_cast(myrank), + static_cast(idx), + static_cast(property1[static_cast(idx)])); + return property1[static_cast(idx)]; + }); /// - if (false) printf("\n"); + if (debug) printf("\n"); for (size_t k = 0; k < insection_size; k++) { - if (false) + if (debug) printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); } - if (false) printf("\n"); + if (debug) printf("\n"); /// } diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp index 90827897cf8..a7a2be4bb03 100644 --- a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -142,9 +142,9 @@ class Tests_MGSimilarity std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]); } - h_v1.resize(1); - two_hop_nbrs.resize(1, handle_->get_stream()); - two_hop_nbrs.shrink_to_fit(handle_->get_stream()); + // h_v1.resize(1); + // two_hop_nbrs.resize(1, handle_->get_stream()); + // two_hop_nbrs.shrink_to_fit(handle_->get_stream()); auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); @@ -326,6 +326,8 @@ class Tests_MGSimilarity auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2); auto h_result_score = cugraph::test::to_host(*handle_, result_score); + std::cout << "pari size: " << h_vertex_pair1.size() << " " << h_vertex_pair2.size() << std::endl; + if (wgt && similarity_usecase.use_weights) { weighted_similarity_compare(mg_graph_view.number_of_vertices(), std::tie(src, dst, wgt), diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index d0b192ff894..5c312a768d0 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -31,10 +31,15 @@ struct test_jaccard_t { template weight_t compute_score(weight_t weight_a, weight_t weight_b, - weight_t a_intersect_b, - weight_t a_union_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return a_intersect_b / a_union_b; + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return weight_a_intersect_b / weight_a_union_b; + } } template @@ -55,10 +60,15 @@ struct test_sorensen_t { template weight_t compute_score(weight_t weight_a, weight_t weight_b, - weight_t a_intersect_b, - weight_t a_union_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return (2 * a_intersect_b) / (weight_a + weight_b); + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return (2 * weight_a_intersect_b) / (weight_a + weight_b); + } } template @@ -79,10 +89,15 @@ struct test_overlap_t { template weight_t compute_score(weight_t weight_a, weight_t weight_b, - weight_t a_intersect_b, - weight_t a_union_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return a_intersect_b / std::min(weight_a, weight_b); + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return weight_a_intersect_b / std::min(weight_a, weight_b); + } } template From 23e91e573ae44d32f3eebcb8b6e5cea38193a605 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 07:18:13 -0700 Subject: [PATCH 05/22] Fix test for per_v_pair_transform_dst_nbr_intersection --- .../cugraph/edge_partition_device_view.cuh | 17 +- cpp/src/link_prediction/similarity_impl.cuh | 22 - cpp/src/prims/detail/nbr_intersection.cuh | 782 +----------------- ..._v_pair_transform_dst_nbr_intersection.cuh | 77 -- .../mg_weighted_similarity_test.cpp | 3 +- ...transform_dst_nbr_weighted_intersection.cu | 684 +++------------ 6 files changed, 149 insertions(+), 1436 deletions(-) diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 163bc0a709c..02b931fbde6 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -111,23 +111,8 @@ class edge_partition_device_view_base_t { // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC __device__ thrust::tuple local_edges( - vertex_t major_idx, bool debug = false) const noexcept + vertex_t major_idx) const noexcept { - if (debug && major_idx == 0) { - printf("offsets_.size(): %d\n", static_cast(offsets_.size())); - - printf("offsets_: "); - for (size_t k = 0; k < offsets_.size(); k++) { - printf("%d ", static_cast(offsets_[k])); - } - printf("\n"); - - printf("indices_: "); - for (size_t k = 0; k < indices_.size(); k++) { - printf("%d ", static_cast(indices_[k])); - } - printf("\n"); - } auto edge_offset = offsets_[major_idx]; auto local_degree = offsets_[major_idx + 1] - edge_offset; auto indices = indices_.data() + edge_offset; diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 1e60c1f5a23..21fbd9712f2 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -73,7 +73,6 @@ rmm::device_uvector similarity( rmm::device_uvector vertex_weights = compute_out_weight_sums(handle, graph_view, *edge_weight_view); - std::cout << ">>>>>>> WITH WEIGHT .........." << std::endl; per_v_pair_transform_dst_nbr_intersection( handle, graph_view, @@ -88,19 +87,6 @@ rmm::device_uvector similarity( auto intersection, auto intersected_properties_a, auto intersected_properties_b) { - bool CODE_DEBUG = false; - if (CODE_DEBUG) - for (size_t k = 0; k < intersection.size(); k++) { - printf("=> (v1 = %d v2 = %d wdeg(v1) = %f wdeg(v2) = %f) %d %f %f\n", - static_cast(v1), - static_cast(v2), - static_cast(weight_a), - static_cast(weight_b), - static_cast(intersection[k]), - static_cast(intersected_properties_a[k]), - static_cast(intersected_properties_b[k])); - } - weight_t min_weight_a_intersect_b = weight_t{0}; weight_t max_weight_a_intersect_b = weight_t{0}; weight_t sum_of_intersected_a = weight_t{0}; @@ -120,13 +106,6 @@ rmm::device_uvector similarity( max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; - // printf("=> v1= %d v2 = %d\n", static_cast(v1), static_cast(v2)); - // printf("=>weight_a = %f\n", static_cast(weight_a)); - // printf("=>weight_b = %f\n", static_cast(weight_b)); - // printf("=>min_weight_a_intersect_b = %f\n", - // static_cast(min_weight_a_intersect_b)); printf("=>max_weight_a_intersect_b = - // %f\n", static_cast(max_weight_a_intersect_b)); - return functor.compute_score(static_cast(weight_a), static_cast(weight_b), static_cast(min_weight_a_intersect_b), @@ -139,7 +118,6 @@ rmm::device_uvector similarity( // CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); } else { - std::cout << ">>>>>>> WITHOUT WEIGHT .........." << std::endl; rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); // diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 9535d4d3f02..1b59f6e970a 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -358,37 +358,16 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { vertex_t invalid_id{}; - int rank{1000}; - size_t edge_partition_idx{1000}; - __device__ edge_t operator()(size_t i) { auto pair = *(vertex_pair_first + i); - bool debug = false; -#if 1 - if (debug) - printf( - "-----rank=%d, edge_partition_idx=%d------\n", rank, static_cast(edge_partition_idx)); - - if constexpr (!std::is_same_v) { - if (debug) printf("called with FirstElementToIdxMap********\n"); - } - if constexpr (!std::is_same_v) { - if (debug) printf("called with SecondElementToIdxMap---------\n"); - } -#endif - vertex_t const* indices0{nullptr}; EdgeProperty_t const* property0{nullptr}; [[maybe_unused]] edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { -#if 1 - if (debug) printf("element0 from edge_partition\n"); -#endif - vertex_t major = thrust::get<0>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -409,10 +388,6 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { -#if 1 - if (debug) printf("element0 from first_element_to_idx_map******\n"); -#endif - auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_degree0 = static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); @@ -423,69 +398,19 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { if constexpr (std::is_same_v) { if constexpr (!std::is_same_v) { property0 = edge_partition_e_value_input.value_first() + local_edge_offset0; - - } else { - // nothing } } else { if constexpr (!std::is_same_v) { property0 = first_element_properties.begin() + local_edge_offset0; - - } else { - // nothing - } - } - -#if 1 - vertex_t element0 = thrust::get<0>(pair); - if (debug) - printf("element0 %d, local_degree0 %d local_edge_offset0 %d\n", - static_cast(element0), - static_cast(local_degree0), - static_cast(local_edge_offset0)); - - for (edge_t k = 0; k < local_degree0; k++) { - if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - EdgeProperty_t ep{}; - ep = edge_partition_e_value_input.get(local_edge_offset0 + k); - if (debug) - printf("( %d %d %.2f %.2f)* ", - static_cast(k), - static_cast(indices0[k]), - static_cast(ep), - static_cast(property0[k])); - } else { - if (debug) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); - } - - } else { - if constexpr (!std::is_same_v) { - EdgeProperty_t ep = *(first_element_properties.begin() + local_edge_offset0 + k); - if (debug) - printf("(%d %d %.2f %.2f)* ", - static_cast(k), - static_cast(indices0[k]), - static_cast(ep), - static_cast(property0[k])); - } else { - if (debug) printf("%d %d* ", static_cast(k), static_cast(indices0[k])); - } } } - if (debug) printf("\n"); -#endif vertex_t const* indices1{nullptr}; EdgeProperty_t const* property1{nullptr}; [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { -#if 1 - if (debug) printf("element1 from edge_partition\n"); -#endif - vertex_t major = thrust::get<1>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -506,10 +431,6 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } } else { -#if 1 - if (debug) printf("element1 from second_element_to_idx_map----\n"); -#endif - auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_degree1 = static_cast(second_element_offsets[idx + 1] - second_element_offsets[idx]); @@ -521,59 +442,12 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { if constexpr (std::is_same_v) { if constexpr (!std::is_same_v) { property1 = edge_partition_e_value_input.value_first() + local_edge_offset1; - - } else { - // nothing } - } else { if constexpr (!std::is_same_v) { property1 = second_element_properties.begin() + local_edge_offset1; - - } else { - // nothing - } - } - -#if 1 - vertex_t element1 = thrust::get<1>(pair); - if (debug) - printf("element1 %d, local_degree1 %d local_edge_offset1 %d\n", - static_cast(element1), - static_cast(local_degree1), - static_cast(local_edge_offset1)); - - for (edge_t k = 0; k < local_degree1; k++) { - if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - EdgeProperty_t ep{}; - ep = edge_partition_e_value_input.get(local_edge_offset1 + k); - if (debug) - printf("(%d %d %.2f %.2f)- ", - static_cast(k), - static_cast(indices1[k]), - static_cast(ep), - static_cast(property1[k])); - } else { - if (debug) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); - } - - } else { - if constexpr (!std::is_same_v) { - EdgeProperty_t ep = *(second_element_properties.begin() + local_edge_offset1 + k); - if (debug) - printf("(%d %d %.2f %.2f)- ", - static_cast(k), - static_cast(indices1[k]), - static_cast(ep), - static_cast(property1[k])); - } else { - if (debug) printf("%d %d- ", static_cast(k), static_cast(indices1[k])); - } } } - if (debug) printf("\n"); -#endif // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance @@ -592,72 +466,10 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], invalid_id); - // auto insection_size = static_cast(thrust::distance(inbr_start, it)); - - if (debug) - printf( - "rank = %d insection_size=%d\n", static_cast(rank), static_cast(insection_size)); - - if (debug) printf("\n"); - for (size_t k = 0; k < insection_size; k++) { - if (debug) printf("k = %d inbr = %d ", static_cast(k), static_cast(inbr_start[k])); - } - if (debug) printf("\n"); - if constexpr (!std::is_same_v) { -#if 1 - if (debug) printf("\n"); - for (size_t k = 0; k < local_degree0; k++) { - if (debug) printf("k = %d p0 = %d\n", static_cast(k), static_cast(property0[k])); - } - - if (debug) printf("\n"); - for (size_t k = 0; k < local_degree1; k++) { - if (debug) printf("k = %d p1 = %d\n", static_cast(k), static_cast(property1[k])); - } - if (debug) printf("\n"); - -#endif - auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; - /* - //// new block - size_t multiedge_intersection_size0 = 0; - - for (size_t k = 0; k < insection_size; k++) { - auto myrank = rank; - - auto first_occurence = thrust::lower_bound( - thrust::seq, indices0, indices0 + local_degree0, inbr_start[k], thrust::less()); - - auto last_occurence = - thrust::upper_bound(thrust::seq, indices0, indices0 + local_degree0, inbr_start[k]); - - size_t nr_repeats = (last_occurence - first_occurence); - - auto offset = thrust::distance(indices0, first_occurence); - - printf("For vertex %d repeats = %d\n", - static_cast(inbr_start[k]), - static_cast(nr_repeats)); - - for (size_t m = 0; m < nr_repeats; m++) { - // ip0_start[multiedge_intersection_size0 + m] = property0[offset + m]; - - if (debug) - printf(">>myrank=%d offset+m=%d p=%f\n", - static_cast(myrank), - static_cast(offset + m), - static_cast(property0[offset + m])); - } - multiedge_intersection_size0 += nr_repeats; - } - - //// - */ - // copy edge properties from first vertex to common neighbors thrust::lower_bound(thrust::seq, indices0, @@ -667,72 +479,15 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip0_start, // indices thrust::less()); - if (debug) printf("\n"); - for (size_t k = 0; k < insection_size; k++) { - if (debug) printf("k = %d idx = %d ", static_cast(k), static_cast(ip0_start[k])); - } - if (debug) printf("\n"); - - auto myrank = rank; - thrust::transform(thrust::seq, - ip0_start, - ip0_start + insection_size, - ip0_start, - [property0, myrank, debug] __device__(auto idx) { - if (debug) - printf("myrank=%d idx=%d p=%f\n", - static_cast(myrank), - static_cast(idx), - static_cast(property0[static_cast(idx)])); - return property0[static_cast(idx)]; - }); - - /// - - if (debug) printf("\n"); - for (size_t k = 0; k < insection_size; k++) { - if (debug) - printf("rank = %d inbrp0 = %d ", static_cast(rank), static_cast(ip0_start[k])); - } - if (debug) printf("\n"); - - /// + thrust::transform( + thrust::seq, + ip0_start, + ip0_start + insection_size, + ip0_start, + [property0] __device__(auto idx) { return property0[static_cast(idx)]; }); auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; - /* - //// new block - size_t multiedge_intersection_size_1 = 0; - - for (size_t k = 0; k < insection_size; k++) { - auto myrank = rank; - auto first_occurence = thrust::lower_bound( - thrust::seq, indices1, indices1 + local_degree1, inbr_start[k], thrust::less()); - - auto last_occurence = - thrust::upper_bound(thrust::seq, indices1, indices1 + local_degree1, inbr_start[k]); - - size_t nr_repeats = (last_occurence - first_occurence); - - auto offset = thrust::distance(indices1, first_occurence); - - printf("For vertex %d repeats = %d\n", - static_cast(inbr_start[k]), - static_cast(nr_repeats)); - - for (size_t m = 0; m < nr_repeats; m++) { - // ip1_start[multiedge_intersection_size_1 + m] = property1[offset + m]; - - if (debug) - printf(">>myrank=%d offset+m=%d p=%f\n", - static_cast(myrank), - static_cast(offset + m), - static_cast(property1[offset + m])); - } - multiedge_intersection_size_1 += nr_repeats; - } - //// - */ // copy edge properties from second vertex to common neighbors thrust::lower_bound(thrust::seq, indices1, @@ -742,36 +497,12 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { ip1_start, // indices thrust::less()); - if (debug) printf("\n"); - for (size_t k = 0; k < insection_size; k++) { - if (debug) - printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); - } - if (debug) printf("\n"); - - thrust::transform(thrust::seq, - ip1_start, - ip1_start + insection_size, - ip1_start, - [property1, myrank, debug] __device__(auto idx) { - if (debug) - printf("myrank=%d idx=%d p=%f\n", - static_cast(myrank), - static_cast(idx), - static_cast(property1[static_cast(idx)])); - return property1[static_cast(idx)]; - }); - - /// - - if (debug) printf("\n"); - for (size_t k = 0; k < insection_size; k++) { - if (debug) - printf("rank = %d inbrp1 = %d ", static_cast(rank), static_cast(ip1_start[k])); - } - if (debug) printf("\n"); - - /// + thrust::transform( + thrust::seq, + ip1_start, + ip1_start + insection_size, + ip1_start, + [property1] __device__(auto idx) { return property1[static_cast(idx)]; }); } return static_cast(thrust::distance(inbr_start, it)); } @@ -956,8 +687,6 @@ nbr_intersection(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; - bool DEBUG_CODE = false; - using edge_partition_e_input_device_view_t = std::conditional_t< std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, @@ -1071,21 +800,6 @@ nbr_intersection(raft::handle_t const& handle, } } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector( - "unique_majors", unique_majors.data(), unique_majors.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - // 2.2 Send majors and group (major_comm_rank, edge_partition_idx) counts rmm::device_uvector rx_majors(0, handle.get_stream()); @@ -1125,27 +839,6 @@ nbr_intersection(raft::handle_t const& handle, size_t{0}); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector( - "d_tx_group_counts", d_tx_group_counts.data(), d_tx_group_counts.size(), std::cout); - - std::cout << "tx_counts:"; - std::copy(tx_counts.data(), - tx_counts.data() + tx_counts.size(), - std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - std::tie(rx_majors, rx_major_counts) = shuffle_values(major_comm, unique_majors.begin(), tx_counts, handle.get_stream()); @@ -1156,29 +849,6 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("rx_majors", rx_majors.data(), rx_majors.size(), std::cout); - - std::cout << "rx_major_counts:"; - std::copy(rx_major_counts.data(), - rx_major_counts.data() + rx_major_counts.size(), - std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; - - raft::print_device_vector( - "rx_group_counts", rx_group_counts.data(), rx_group_counts.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - // 2.3. Enumerate degrees and neighbors for the received majors rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); @@ -1207,23 +877,6 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span( rx_group_counts.data(), rx_group_counts.size())}); - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("rx_reordered_group_counts", - rx_reordered_group_counts.data(), - rx_reordered_group_counts.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - rmm::device_uvector d_rx_reordered_group_lasts(rx_reordered_group_counts.size(), handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1243,21 +896,6 @@ nbr_intersection(raft::handle_t const& handle, rx_group_counts.end(), rx_group_firsts.begin()); - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector( - "rx_group_firsts", rx_group_firsts.data(), rx_group_firsts.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - local_degrees_for_rx_majors.resize(rx_majors.size(), handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = @@ -1285,23 +923,6 @@ nbr_intersection(raft::handle_t const& handle, local_degrees_for_rx_majors.size())}); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("local_degrees_for_rx_majors", - local_degrees_for_rx_majors.data(), - local_degrees_for_rx_majors.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - rmm::device_uvector local_nbr_offsets_for_rx_majors( local_degrees_for_rx_majors.size() + 1, handle.get_stream()); local_nbr_offsets_for_rx_majors.set_element_to_zero_async(size_t{0}, handle.get_stream()); @@ -1319,23 +940,6 @@ nbr_intersection(raft::handle_t const& handle, (*local_nbrs_properties_for_rx_majors) .resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("local_nbr_offsets_for_rx_majors", - local_nbr_offsets_for_rx_majors.data(), - local_nbr_offsets_for_rx_majors.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( @@ -1375,23 +979,6 @@ nbr_intersection(raft::handle_t const& handle, (*local_nbrs_properties_for_rx_majors).size())}); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("local_nbrs_for_rx_majors", - local_nbrs_for_rx_majors.data(), - local_nbrs_for_rx_majors.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); std::inclusive_scan( rx_major_counts.begin(), rx_major_counts.end(), h_rx_offsets.begin() + 1); @@ -1413,23 +1000,6 @@ nbr_intersection(raft::handle_t const& handle, d_local_nbr_counts.size(), handle.get_stream()); handle.sync_stream(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("d_local_nbr_counts", - d_local_nbr_counts.data(), - d_local_nbr_counts.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } } // 2.4 Send the degrees and neighbors back @@ -1449,23 +1019,6 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_offsets).begin() + 1); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("(*major_nbr_offsets)", - (*major_nbr_offsets).data(), - (*major_nbr_offsets).size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); @@ -1480,30 +1033,6 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("(*major_nbr_indices)", - (*major_nbr_indices).data(), - (*major_nbr_indices).size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_properties)", - (*major_nbr_properties).data(), - (*major_nbr_properties).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - major_to_idx_map_ptr = std::make_unique>( unique_majors.begin(), unique_majors.end(), @@ -1707,30 +1236,6 @@ nbr_intersection(raft::handle_t const& handle, auto const comm_rank = comm.get_rank(); auto const comm_size = comm.get_size(); - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("(*major_nbr_indices)", - (*major_nbr_indices).data(), - (*major_nbr_indices).size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*major_nbr_properties)", - (*major_nbr_properties).data(), - (*major_nbr_properties).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - auto second_element_to_idx_map = detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( @@ -1768,45 +1273,7 @@ nbr_intersection(raft::handle_t const& handle, (*rx_v_pair_nbr_intersection_properties0).size()), raft::device_span((*rx_v_pair_nbr_intersection_properties1).data(), (*rx_v_pair_nbr_intersection_properties1).size()), - invalid_vertex_id::value, - handle.get_comms().get_rank(), - i}); - - // { - // auto& comm = handle.get_comms(); - // auto const comm_rank = comm.get_rank(); - // auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("rx_v_pair_nbr_intersection_indices", - rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", - (*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", - (*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - // } + invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); @@ -1849,42 +1316,6 @@ nbr_intersection(raft::handle_t const& handle, (*rx_v_pair_nbr_intersection_properties1).shrink_to_fit(handle.get_stream()); } - { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-copy-tabulate) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("rx_v_pair_nbr_intersection_indices", - rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties0)", - (*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*rx_v_pair_nbr_intersection_properties1)", - (*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - thrust::inclusive_scan(handle.get_thrust_policy(), rx_v_pair_nbr_intersection_sizes.begin(), rx_v_pair_nbr_intersection_sizes.end(), @@ -2110,42 +1541,6 @@ nbr_intersection(raft::handle_t const& handle, .resize((*gathered_nbr_intersection_properties1).size(), handle.get_stream()); } - { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-device_multicast_sendrecv) :" << comm_rank - << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("gathered_nbr_intersection_indices", - gathered_nbr_intersection_indices.data(), - gathered_nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*gathered_nbr_intersection_properties0)", - (*gathered_nbr_intersection_properties0).data(), - (*gathered_nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*gathered_nbr_intersection_properties1)", - (*gathered_nbr_intersection_properties1).data(), - (*gathered_nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - if constexpr (!std::is_same_v) { thrust::for_each( handle.get_thrust_policy(), @@ -2194,42 +1589,6 @@ nbr_intersection(raft::handle_t const& handle, }); } - - { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after-gather) :" << comm_rank << " partition index:" << i - << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("combined_nbr_intersection_indices", - combined_nbr_intersection_indices.data(), - combined_nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*combined_nbr_intersection_properties0)", - (*combined_nbr_intersection_properties0).data(), - (*combined_nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*combined_nbr_intersection_properties1)", - (*combined_nbr_intersection_properties1).data(), - (*combined_nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } } edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); @@ -2363,9 +1722,7 @@ nbr_intersection(raft::handle_t const& handle, (*nbr_intersection_properties0).size()), raft::device_span((*nbr_intersection_properties1).data(), (*nbr_intersection_properties1).size()), - invalid_vertex_id::value, - 0, - 0}); + invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -2407,44 +1764,9 @@ nbr_intersection(raft::handle_t const& handle, size_t num_copied{0}; size_t num_scanned{0}; - if constexpr (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (before while loop) :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - while (num_scanned < nbr_intersection_indices.size()) { size_t this_scan_size = std::min( - size_t{1} << 30, + size_t{1} << 27, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); if constexpr (std::is_same_v) { @@ -2466,11 +1788,6 @@ nbr_intersection(raft::handle_t const& handle, auto nbr = thrust::get<0>(nbr_p0_p1); auto p0 = thrust::get<1>(nbr_p0_p1); auto p1 = thrust::get<2>(nbr_p0_p1); - if (false) - printf("%d %d %d\n", - static_cast(nbr), - static_cast(p0), - static_cast(p1)); return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; }))); } @@ -2482,41 +1799,6 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_properties1 = std::move(tmp_properties1); } - if constexpr (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank (after while loop) :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - #else if constexpr (std::is_same_v) { @@ -2551,40 +1833,6 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.begin() + 1); } - if constexpr (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("nbr_intersection_indices", - nbr_intersection_indices.data(), - nbr_intersection_indices.size(), - std::cout); - - if constexpr (!std::is_same_v) { - raft::print_device_vector("(*nbr_intersection_properties0)", - (*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size(), - std::cout); - - raft::print_device_vector("(*nbr_intersection_properties1)", - (*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - // 5. Return return std::make_tuple(std::move(nbr_intersection_offsets), diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index 008276051f0..215f6dc3e90 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -166,20 +166,6 @@ struct call_intersection_op_t { dst_prop = *(vertex_property_first + dst_offset); } - /* - printf("(%d <-> %d) %d %d %d\n", - static_cast(src), - static_cast(dst), - static_cast(src_prop), - static_cast(dst_prop), - static_cast(intersection.size())); - - for (size_t k = 0; k < intersection.size(); k++) { - printf("%d ", static_cast(*(intersection.data() + k))); - } - printf("\n"); - */ - // if constexpr (std::is_same_v) { *(major_minor_pair_value_output_first + index) = intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); @@ -246,7 +232,6 @@ void per_v_pair_transform_dst_nbr_intersection( { static_assert(!GraphViewType::is_storage_transposed); - bool DEBUG_CODE = false; using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using property_t = typename thrust::iterator_traits::value_type; @@ -343,24 +328,6 @@ void per_v_pair_transform_dst_nbr_intersection( size_t{0}); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); @@ -425,50 +392,6 @@ void per_v_pair_transform_dst_nbr_intersection( std::array{true, true}, do_expensive_check); - if constexpr (!std::is_same_v) { - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - if (DEBUG_CODE) - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << " partition index:" << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("intersection_offsets", - intersection_offsets.data(), - intersection_offsets.size(), - std::cout); - - raft::print_device_vector("intersection_indices", - intersection_indices.data(), - intersection_indices.size(), - std::cout); - - // if constexpr (!std::is_same_v) { - if (r_nbr_intersection_properties0) { - raft::print_device_vector("r_nbr_intersection_properties0", - r_nbr_intersection_properties0->data(), - r_nbr_intersection_properties0->size(), - std::cout); - } - if (r_nbr_intersection_properties1) { - raft::print_device_vector("r_nbr_intersection_properties1", - r_nbr_intersection_properties1->data(), - r_nbr_intersection_properties1->size(), - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - } - if (unique_vertices) { auto vertex_value_input_for_unique_vertices_first = get_dataframe_buffer_begin(*property_buffer_for_unique_vertices); diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp index a7a2be4bb03..8e1fa90bfa1 100644 --- a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -326,7 +326,8 @@ class Tests_MGSimilarity auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2); auto h_result_score = cugraph::test::to_host(*handle_, result_score); - std::cout << "pari size: " << h_vertex_pair1.size() << " " << h_vertex_pair2.size() << std::endl; + std::cout << "pari size: " << h_vertex_pair1.size() << " " << h_vertex_pair2.size() + << std::endl; if (wgt && similarity_usecase.use_weights) { weighted_similarity_compare(mg_graph_view.number_of_vertices(), diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 2d3a054a55d..162220f7b00 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -59,12 +59,6 @@ struct intersection_op_t { raft::device_span intersection_p0, raft::device_span intersection_p1) const { - // printf("\n%d %d %d %d %d\n", - // static_cast(v0), - // static_cast(v1), - // static_cast(v0_prop), - // static_cast(v1_prop), - // static_cast(intersection.size())); return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); } }; @@ -104,12 +98,25 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.start("MG Construct graph"); } + constexpr bool store_transposed = false; + constexpr bool multi_gpu = true; + cugraph::graph_t mg_graph(*handle_); + std::optional< + cugraph::edge_property_t, + weight_t>> + mg_edge_weight{std::nullopt}; + std::optional> mg_renumber_map{std::nullopt}; - // std::tie(mg_graph, std::ignore, mg_renumber_map) = - // cugraph::test::construct_graph( - // *handle_, input_usecase, false, true); + constexpr bool test_weighted = true; + constexpr bool renumber = true; + constexpr bool drop_self_loops = false; + constexpr bool drop_multi_edges = true; + + std::tie(mg_graph, mg_edge_weight, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -118,455 +125,17 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.display_and_clear(std::cout); } - std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; - - constexpr bool store_transposed = false; - constexpr bool multi_gpu = true; - - std::optional< - cugraph::edge_property_t, - weight_t>> - edge_weights{std::nullopt}; - /* - /// - // - // Create decision graph from edgelist - // - - // using DecisionGraphViewType = cugraph::graph_view_t; - - // cugraph::graph_t decision_graph(*handle_); - - // std::optional> renumber_map{std::nullopt}; - // std::optional> - coarse_edge_weights{ - // std::nullopt}; - - vertex_t N = 4; - vertex_t nr_valid_tuples = N * N - N; - - std::vector h_srcs(nr_valid_tuples); - std::vector h_dsts(nr_valid_tuples); - std::vector h_weights(nr_valid_tuples); - - // rmm::device_uvector d_srcs(nr_valid_tuples, handle_->get_stream()); - // rmm::device_uvector d_dsts(nr_valid_tuples, handle_->get_stream()); - // std::optional> d_weights = - // std::make_optional(rmm::device_uvector(nr_valid_tuples, - handle_->get_stream())); - - auto& comm = handle_->get_comms(); - auto& major_comm = handle_->get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle_->get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto gpu_id_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ - comm_size, major_comm_size, minor_comm_size}; - std::srand(comm_rank); - - int edge_counter = 0; - for (vertex_t i = 0; i < N; i++) { - for (vertex_t j = 0; j < N; j++) { - if (i != j) { - h_srcs[edge_counter] = i; - h_dsts[edge_counter] = j; - h_weights[edge_counter] = std::max(i, j) * 10 + std::min(i, j); - edge_counter++; - } - } - } - - comm.barrier(); - if (comm_rank == 0) - for (int i = 0; i < edge_counter; i++) { - std::cout << "(" << h_srcs[i] << "," << h_dsts[i] << ") => " - << gpu_id_key_func(h_srcs[i], h_dsts[i]) << std::endl; - } - comm.barrier(); - - auto d_srcs = cugraph::test::to_device(*handle_, h_srcs); - auto d_dsts = cugraph::test::to_device(*handle_, h_dsts); - auto d_weights = std::make_optional(cugraph::test::to_device(*handle_, h_weights)); - - if (multi_gpu) { - auto& comm = handle_->get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "Rank :" << comm_rank << std::endl; - - std::cout << " d_srcs.size(): " << d_srcs.size() << " d_dsts.size(): " << d_dsts.size() - << " (*d_weights).size(): " << (*d_weights).size() << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("d_srcs: ", d_srcs.data(), d_srcs.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("d_dsts: ", d_dsts.data(), d_dsts.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector( - "(*d_weights): ", (*d_weights).data(), (*d_weights).size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - - std::tie(store_transposed ? d_dsts : d_srcs, - store_transposed ? d_srcs : d_dsts, - d_weights, - std::ignore, - std::ignore) = - cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< - vertex_t, - vertex_t, - weight_t, - int32_t>(*handle_, - store_transposed ? std::move(d_dsts) : std::move(d_srcs), - store_transposed ? std::move(d_srcs) : std::move(d_dsts), - std::move(d_weights), - std::nullopt, - std::nullopt); - - cugraph::test::sort_and_remove_multi_edges(*handle_, d_srcs, d_dsts, d_weights); - - if (multi_gpu) { - auto& comm = handle_->get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "Rank :" << comm_rank << std::endl; - - std::cout << " d_srcs.size(): " << d_srcs.size() << " d_dsts.size(): " << d_dsts.size() - << " (*d_weights).size(): " << (*d_weights).size() << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("d_srcs: ", d_srcs.data(), d_srcs.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector("d_dsts: ", d_dsts.data(), d_dsts.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - raft::print_device_vector( - "(*d_weights): ", (*d_weights).data(), (*d_weights).size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - - std::cout << "Before create_graph_from_edgelist ... " << std::endl; - std::tie(mg_graph, edge_weights, std::ignore, std::ignore, mg_renumber_map) = - cugraph::create_graph_from_edgelist(*handle_, - std::nullopt, - std::move(d_srcs), - std::move(d_dsts), - std::move(d_weights), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, false}, - true, - true); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "Returned from create_graph_from_edgelist" << std::endl; - // auto decision_graph_view = decision_graph.view(); - - /// - */ - - bool test_weighted = true; - bool renumber = true; - std::tie(mg_graph, edge_weights, mg_renumber_map) = + std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; + std::tie(mg_graph, mg_edge_weight, mg_renumber_map) = cugraph::test::read_graph_from_csv_file( *handle_, file_path, test_weighted, renumber); - - auto mg_graph_view = mg_graph.view(); - auto edge_weight_view = (*edge_weights).view(); - using GraphViewType = decltype(mg_graph.view()); - - if (GraphViewType::is_multi_gpu) { - auto vertex_partitions_range_lasts = - cugraph::test::to_device(*handle_, mg_graph_view.vertex_partition_range_lasts()); - - auto& comm = handle_->get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - auto& major_comm = handle_->get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto const major_comm_rank = major_comm.get_rank(); - - auto& minor_comm = handle_->get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "Rank :" << comm_rank << std::endl; - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "(" << major_comm_size << minor_comm_size << ")" << std::endl; - - std::cout << "(major_rank, minor_rank): " << major_comm_rank << minor_comm.get_rank() - << std::endl; - - raft::print_device_vector("vertex_partitions_range_lasts:", - vertex_partitions_range_lasts.data(), - vertex_partitions_range_lasts.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - - if constexpr (GraphViewType::is_multi_gpu) { - auto& comm = handle_->get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - /* - std::vector h_major_range_lasts(mg_graph_view.number_of_local_edge_partitions()); - for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - cugraph::edge_partition_device_view_t( - mg_graph_view.local_edge_partition_view(i)); - h_major_range_lasts[i] = edge_partition.major_range_last(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "(rank = " << comm_rank << ", edge partittion idx = " << i - << ") : " << edge_partition.major_range_first() << " -- " - << edge_partition.major_range_last() << std::endl; - } - comm.barrier(); - } - } - - rmm::device_uvector d_major_range_lasts(h_major_range_lasts.size(), - handle_->get_stream()); - raft::update_device(d_major_range_lasts.data(), - h_major_range_lasts.data(), - h_major_range_lasts.size(), - handle_->get_stream()); - handle_->sync_stream(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("d_major_range_lasts: ", - d_major_range_lasts.data(), - d_major_range_lasts.size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - */ - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "edge_counts: "; - std::copy(edge_weight_view.edge_counts().begin(), - edge_weight_view.edge_counts().end(), - std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; - edge_t num_edges = std::reduce(edge_weight_view.edge_counts().begin(), - edge_weight_view.edge_counts().end()); - - std::cout << std::endl << "num_edges: " << num_edges << std::endl; - - for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { - std::cout << "partition " << i << " weights"; - raft::print_device_vector(":", - edge_weight_view.value_firsts()[i], - edge_weight_view.edge_counts()[i], - std::cout); - } - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("(*mg_renumber_map): ", - (*mg_renumber_map).data(), - (*mg_renumber_map).size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - for (size_t i = 0; i < mg_graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - cugraph::edge_partition_device_view_t( - mg_graph_view.local_edge_partition_view(i)); - - auto edge_partition_weight_view = - cugraph::detail::edge_partition_edge_property_device_view_t( - edge_weight_view, i); - - auto edge_partition_weight_value_ptr = edge_partition_weight_view.value_first(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << ", edge partittion idx = " << i << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("edge_weight_view: ", - edge_weight_view.value_firsts()[i], - edge_weight_view.edge_counts()[i], - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "rank = " << comm_rank << ", edge partittion idx = " << i << " : " - << edge_partition.major_range_first() << "--" - << edge_partition.major_range_last() << std::endl; - - thrust::for_each( - handle_->get_thrust_policy(), - thrust::make_counting_iterator(edge_partition.major_range_first()), - thrust::make_counting_iterator(edge_partition.major_range_last()), - [edge_partition, edge_partition_weight_value_ptr] __device__(vertex_t major) { - printf("major -> %d\n", major); - - vertex_t major_idx{}; - auto major_hypersparse_first = edge_partition.major_hypersparse_first(); - if (major_hypersparse_first) { - printf("*major_hypersparse_first = %d\n", - static_cast(*major_hypersparse_first)); - - if (major < *major_hypersparse_first) { - major_idx = edge_partition.major_offset_from_major_nocheck(major); - } else { - auto major_hypersparse_idx = - edge_partition.major_hypersparse_idx_from_major_nocheck(major); - if (!major_hypersparse_idx) { - printf("No major_hypersparse_idx\n"); - return true; - } - major_idx = - edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) + - *major_hypersparse_idx; - } - } else { - printf("No major_hypersparse_first\n"); - - major_idx = edge_partition.major_offset_from_major_nocheck(major); - } - - printf("==> major_idx = %d\n", major_idx); - - vertex_t const* indices{nullptr}; - edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = - edge_partition.local_edges(major_idx, true); - - // std::optional> edge_weight_view; - - auto number_of_edges = edge_partition.number_of_edges(); - - printf( - "major = %d edge_offset = %d local_degree= %d nr_edges_of_edge_partition=%d\n", - major, - edge_offset, - local_degree, - number_of_edges); - for (edge_t nbr_idx = 0; nbr_idx < local_degree; nbr_idx++) { - // printf("%d ", indices[nbr_idx]); - printf("%d %d %.2f \n", - major, - indices[nbr_idx], - *(edge_partition_weight_value_ptr + edge_offset + nbr_idx)); - } - printf("\n"); - }); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - } // end of loop over edge partitions - } - - /* - - if (multi_gpu) { - auto& comm = handle_->get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - - for (int k = 0; k < comm_size; k++) { - comm.barrier(); - if (comm_rank == k) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "Rank :" << comm_rank << std::endl; - - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - } - */ - // #if 0 + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = (*mg_edge_weight).view(); + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive ASSERT_TRUE( @@ -579,12 +148,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection (static_cast(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0), handle_->get_stream()); - std::cout << "Rank: " << comm_rank - << " prims_usecase.num_vertex_pairs:" << prims_usecase.num_vertex_pairs << std::endl; - - std::cout << "Rank: " << comm_rank << " cugraph::size_dataframe_buffer(mg_vertex_pair_buffer): " - << cugraph::size_dataframe_buffer(mg_vertex_pair_buffer) << std::endl; - thrust::tabulate( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), @@ -592,13 +155,8 @@ class Tests_MGPerVPairTransformDstNbrIntersection [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { cuco::detail::MurmurHash3_32 hash_func{}; // use hash_func to generate arbitrary vertex pairs - auto v0 = 2; // static_cast(hash_func(i + comm_rank) % num_vertices); - auto v1 = - 3; // static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); - printf("comm_rank=%d v0= %d, v1=%d\n", - static_cast(comm_rank), - static_cast(v0), - static_cast(v1)); + auto v0 = static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); return thrust::make_tuple(v0, v1); }); @@ -620,29 +178,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::nullopt, h_vertex_partition_range_lasts); - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("std::get<0>(mg_vertex_pair_buffer)", - std::get<0>(mg_vertex_pair_buffer).data(), - std::get<0>(mg_vertex_pair_buffer).size(), - std::cout); - - raft::print_device_vector("std::get<1>(mg_vertex_pair_buffer)", - std::get<1>(mg_vertex_pair_buffer).data(), - std::get<1>(mg_vertex_pair_buffer).size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); @@ -656,13 +191,35 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, - edge_weight_view, + mg_edge_weight_view, cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); - + /* + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("mg_vertex_pair_buffer<0>", + std::get<0>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + std::cout); + + raft::print_device_vector("mg_vertex_pair_buffer<1>", + std::get<1>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + std::cout); + + raft::print_device_vector("mg_result_buffer<0>", + std::get<0>(mg_result_buffer).data(), + cugraph::size_dataframe_buffer(mg_result_buffer), + std::cout); + + raft::print_device_vector("mg_result_buffer<1>", + std::get<1>(mg_result_buffer).data(), + cugraph::size_dataframe_buffer(mg_result_buffer), + std::cout); + */ if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -670,29 +227,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.display_and_clear(std::cout); } - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("std::get<0>(mg_result_buffer)", - std::get<0>(mg_result_buffer).data(), - std::get<0>(mg_result_buffer).size(), - std::cout); - - raft::print_device_vector("std::get<1>(mg_result_buffer)", - std::get<1>(mg_result_buffer).data(), - std::get<1>(mg_result_buffer).size(), - std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - // 3. validate MG results if (prims_usecase.check_correctness) { @@ -720,7 +254,19 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::test::device_gatherv(*handle_, std::get<1>(mg_vertex_pair_buffer).data(), std::get<1>(mg_vertex_pair_buffer).size()); + /* + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("mg_aggregate_vertex_pair_buffer<0>", + std::get<0>(mg_aggregate_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), + std::cout); + raft::print_device_vector("mg_aggregate_vertex_pair_buffer<1>", + std::get<1>(mg_aggregate_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), + std::cout); + */ auto mg_aggregate_result_buffer = cugraph::allocate_dataframe_buffer>(0, handle_->get_stream()); std::get<0>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( @@ -728,11 +274,33 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::get<1>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); + /* + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("mg_aggregate_result_buffer<0>", + std::get<0>(mg_aggregate_result_buffer).data(), + cugraph::size_dataframe_buffer(mg_aggregate_result_buffer), + std::cout); + + raft::print_device_vector("mg_aggregate_result_buffer<1>", + std::get<1>(mg_aggregate_result_buffer).data(), + cugraph::size_dataframe_buffer(mg_aggregate_result_buffer), + std::cout); + */ + cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + + std::optional< + cugraph::edge_property_t, + weight_t>> + sg_edge_weight{std::nullopt}; + + std::tie(sg_graph, sg_edge_weight, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, - std::optional>{std::nullopt}, + mg_edge_weight + ? std::make_optional(mg_edge_weight_view) + : std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); @@ -744,25 +312,38 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); auto sg_out_degrees = sg_graph_view.compute_out_degrees(*handle_); - // cugraph::per_v_pair_transform_dst_nbr_intersection( - // *handle_, - // sg_graph_view, - // edge_weight_view, - // cugraph::get_dataframe_buffer_begin( - // mg_aggregate_vertex_pair_buffer /* now unrenumbered */), - // cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered - // */), sg_out_degrees.begin(), intersection_op_t{}, - // cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - - // bool valid = thrust::equal(handle_->get_thrust_policy(), - // cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), - // cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), - // cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - - // ASSERT_TRUE(valid); + std::cout << "Calling SG per_v_pair_transform_dst_nbr_intersection\n"; + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + sg_graph_view, + (*sg_edge_weight).view(), + cugraph::get_dataframe_buffer_begin( + mg_aggregate_vertex_pair_buffer /* now unrenumbered */), + cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered + */), sg_out_degrees.begin(), intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + /* + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + raft::print_device_vector("sg_result_buffer<0>", + std::get<0>(sg_result_buffer).data(), + cugraph::size_dataframe_buffer(sg_result_buffer), + std::cout); + + raft::print_device_vector("sg_result_buffer<1>", + std::get<1>(sg_result_buffer).data(), + cugraph::size_dataframe_buffer(sg_result_buffer), + std::cout); + */ + bool valid = thrust::equal(handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), + cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + ASSERT_TRUE(valid); } } - // #endif } private: @@ -784,7 +365,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTup run_current_test>(std::get<0>(param), std::get<1>(param)); } -/* + TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) { auto param = GetParam(); @@ -838,31 +419,28 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -*/ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1}, true}), + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{10}, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); -// INSTANTIATE_TEST_SUITE_P(rmat_small_test, -// Tests_MGPerVPairTransformDstNbrIntersection_Rmat, -// ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, -// true}), -// ::testing::Values(cugraph::test::Rmat_Usecase( -// 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); - -// INSTANTIATE_TEST_SUITE_P( -// rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with -// --gtest_filter to select only the rmat_benchmark_test with a specific -// vertex & edge type combination) by command line arguments and do not -// include more than one Rmat_Usecase that differ only in scale or edge -// factor (to avoid running same benchmarks more than once) */ -// Tests_MGPerVPairTransformDstNbrIntersection_Rmat, -// ::testing::Combine( -// ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), -// ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, -// false)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 261ec7adff8bb866cd6b1ed7462134e471a5523d Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 08:01:12 -0700 Subject: [PATCH 06/22] Change test functor to check per_v_pair_transform_dst_nbr_intersection for weighted graphs --- cpp/src/link_prediction/similarity_impl.cuh | 6 +- ..._v_pair_transform_dst_nbr_intersection.cuh | 6 - ...transform_dst_nbr_weighted_intersection.cu | 132 ++++++------------ cpp/tests/utilities/test_graphs.hpp | 1 - 4 files changed, 45 insertions(+), 100 deletions(-) diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 21fbd9712f2..430f56d12c3 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -80,8 +80,8 @@ rmm::device_uvector similarity( vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs, weighted_out_degrees.begin(), - [functor] __device__(auto v1, - auto v2, + [functor] __device__(auto a, + auto b, auto weight_a, auto weight_b, auto intersection, @@ -115,8 +115,6 @@ rmm::device_uvector similarity( do_expensive_check); return similarity_score; - - // CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); } else { rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index 215f6dc3e90..b40e0f102ce 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -166,14 +166,8 @@ struct call_intersection_op_t { dst_prop = *(vertex_property_first + dst_offset); } - // if constexpr (std::is_same_v) { *(major_minor_pair_value_output_first + index) = intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); - - // } else { - // *(major_minor_pair_value_output_first + index) = - // intersection_op(src, dst, src_prop, dst_prop, intersection.size()); - // } } }; diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 162220f7b00..d3398eb0f62 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -51,15 +51,32 @@ template struct intersection_op_t { __device__ thrust::tuple operator()( - vertex_t v0, - vertex_t v1, - edge_t v0_prop /* out degree */, - edge_t v1_prop /* out degree */, + vertex_t a, + vertex_t b, + weight_t weight_a /* weighted out degree */, + weight_t weight_b /* weighted out degree */, raft::device_span intersection, - raft::device_span intersection_p0, - raft::device_span intersection_p1) const + raft::device_span intersected_properties_a, + raft::device_span intersected_properties_b) const { - return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); + weight_t min_weight_a_intersect_b = weight_t{0}; + weight_t max_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_intersected_a = weight_t{0}; + weight_t sum_of_intersected_b = weight_t{0}; + + for (size_t k = 0; k < intersection.size(); k++) { + min_weight_a_intersect_b += min(intersected_properties_a[k], intersected_properties_b[k]); + max_weight_a_intersect_b += max(intersected_properties_a[k], intersected_properties_b[k]); + sum_of_intersected_a += intersected_properties_a[k]; + sum_of_intersected_b += intersected_properties_b[k]; + } + + weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; + weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; + + max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; + + return thrust::make_tuple(min_weight_a_intersect_b, max_weight_a_intersect_b); } }; @@ -118,6 +135,9 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::test::construct_graph( *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = (*mg_edge_weight).view(); + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -125,17 +145,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.display_and_clear(std::cout); } - /* - std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; - std::tie(mg_graph, mg_edge_weight, mg_renumber_map) = - cugraph::test::read_graph_from_csv_file( - *handle_, file_path, test_weighted, renumber); - using GraphViewType = decltype(mg_graph.view()); - */ - - auto mg_graph_view = mg_graph.view(); - auto mg_edge_weight_view = (*mg_edge_weight).view(); - // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive ASSERT_TRUE( @@ -178,7 +187,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::nullopt, h_vertex_partition_range_lasts); - auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( + auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); @@ -188,38 +197,19 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection"); } + rmm::device_uvector mg_out_weight_sums = + compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); + cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, mg_edge_weight_view, cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), - mg_out_degrees.begin(), + mg_out_weight_sums.begin(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); - /* - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("mg_vertex_pair_buffer<0>", - std::get<0>(mg_vertex_pair_buffer).data(), - cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), - std::cout); - - raft::print_device_vector("mg_vertex_pair_buffer<1>", - std::get<1>(mg_vertex_pair_buffer).data(), - cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), - std::cout); - - raft::print_device_vector("mg_result_buffer<0>", - std::get<0>(mg_result_buffer).data(), - cugraph::size_dataframe_buffer(mg_result_buffer), - std::cout); - - raft::print_device_vector("mg_result_buffer<1>", - std::get<1>(mg_result_buffer).data(), - cugraph::size_dataframe_buffer(mg_result_buffer), - std::cout); - */ + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -254,40 +244,15 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::test::device_gatherv(*handle_, std::get<1>(mg_vertex_pair_buffer).data(), std::get<1>(mg_vertex_pair_buffer).size()); - /* - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("mg_aggregate_vertex_pair_buffer<0>", - std::get<0>(mg_aggregate_vertex_pair_buffer).data(), - cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), - std::cout); - - raft::print_device_vector("mg_aggregate_vertex_pair_buffer<1>", - std::get<1>(mg_aggregate_vertex_pair_buffer).data(), - cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), - std::cout); - */ + auto mg_aggregate_result_buffer = - cugraph::allocate_dataframe_buffer>(0, handle_->get_stream()); + cugraph::allocate_dataframe_buffer>( + 0, handle_->get_stream()); std::get<0>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( *handle_, std::get<0>(mg_result_buffer).data(), std::get<0>(mg_result_buffer).size()); std::get<1>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); - /* - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("mg_aggregate_result_buffer<0>", - std::get<0>(mg_aggregate_result_buffer).data(), - cugraph::size_dataframe_buffer(mg_aggregate_result_buffer), - std::cout); - - raft::print_device_vector("mg_aggregate_result_buffer<1>", - std::get<1>(mg_aggregate_result_buffer).data(), - cugraph::size_dataframe_buffer(mg_aggregate_result_buffer), - std::cout); - */ - cugraph::graph_t sg_graph(*handle_); std::optional< @@ -308,11 +273,13 @@ class Tests_MGPerVPairTransformDstNbrIntersection if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); - auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( - cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); - auto sg_out_degrees = sg_graph_view.compute_out_degrees(*handle_); + auto sg_result_buffer = + cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); + + rmm::device_uvector sg_out_weight_sums = + compute_out_weight_sums(*handle_, sg_graph_view, (*sg_edge_weight).view()); - std::cout << "Calling SG per_v_pair_transform_dst_nbr_intersection\n"; cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, sg_graph_view, @@ -320,22 +287,9 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::get_dataframe_buffer_begin( mg_aggregate_vertex_pair_buffer /* now unrenumbered */), cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered - */), sg_out_degrees.begin(), intersection_op_t{}, + */), sg_out_weight_sums.begin(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - /* - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("sg_result_buffer<0>", - std::get<0>(sg_result_buffer).data(), - cugraph::size_dataframe_buffer(sg_result_buffer), - std::cout); - - raft::print_device_vector("sg_result_buffer<1>", - std::get<1>(sg_result_buffer).data(), - cugraph::size_dataframe_buffer(sg_result_buffer), - std::cout); - */ bool valid = thrust::equal(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 13afddd92bd..16c9d3ed145 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -144,7 +144,6 @@ class File_Usecase : public detail::TranslateGraph_Usecase { std::optional> weights{}; std::optional> vertices{}; bool is_symmetric{}; - std::cout << "graph_file_full_path_: " << graph_file_full_path_ << std::endl; auto extension = graph_file_full_path_.substr(graph_file_full_path_.find_last_of(".") + 1); if (extension == "mtx") { std::tie(srcs, dsts, weights, vertices, is_symmetric) = From c5dfb4c2fb94771a72d0530f3bbddc1822006102 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 08:20:35 -0700 Subject: [PATCH 07/22] Style fix --- cpp/src/c_api/similarity.cpp | 2 +- cpp/tests/link_prediction/weighted_similarity_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index 3241018bfbd..730416abd7b 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index b87d4c03581..0214df5dd4d 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-223, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 4ee3f2591e92a0957a01476d6c530c31e06a4ee8 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 08:25:22 -0700 Subject: [PATCH 08/22] Style fix --- cpp/tests/link_prediction/similarity_compare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/link_prediction/similarity_compare.cpp b/cpp/tests/link_prediction/similarity_compare.cpp index 7afda42e1c1..dec14a347f8 100644 --- a/cpp/tests/link_prediction/similarity_compare.cpp +++ b/cpp/tests/link_prediction/similarity_compare.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 9726631d1774fc3d6979c74670f634659ed300aa Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 10:23:47 -0700 Subject: [PATCH 09/22] Style fix --- cpp/tests/utilities/test_utilities.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 3f393b5353e..321a0536e02 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -536,6 +536,7 @@ mg_graph_to_sg_graph( bool renumber); // Only the rank 0 GPU holds the valid data + template std::tuple>, rmm::device_uvector> mg_vertex_property_values_to_sg_vertex_property_values( From 278cd823b99d1c0cc076461b62c7e988f791217f Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 11:27:43 -0700 Subject: [PATCH 10/22] Style fix --- cpp/tests/utilities/test_utilities.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 321a0536e02..0eff8dedc8f 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -547,7 +547,7 @@ mg_vertex_property_values_to_sg_vertex_property_values( std::optional> sg_renumber_map, // std::nullopt if the SG graph is not renumbered std::optional> - mg_vertices, // std::nullopt if the entire local vertex partition range is assumed + mg_vertices, // std::nullopt if the entire local vertex partition range is assumed raft::device_span mg_values); template From c0e93c9b0331493b230d1f37d7cba84146a6612f Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 21 Jul 2023 11:59:25 -0700 Subject: [PATCH 11/22] Style fix --- cpp/tests/link_prediction/weighted_similarity_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index 0214df5dd4d..ca644b76c5a 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-223, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From c6d1412141b5375f1b18ce05b5b3166633edc8d9 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Mon, 24 Jul 2023 12:37:08 -0700 Subject: [PATCH 12/22] Address PR comments --- cpp/src/link_prediction/jaccard_impl.cuh | 13 +++-- cpp/src/link_prediction/overlap_impl.cuh | 13 +++-- cpp/src/link_prediction/similarity_impl.cuh | 59 +++++++++++---------- cpp/src/link_prediction/sorensen_impl.cuh | 13 +++-- 4 files changed, 48 insertions(+), 50 deletions(-) diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index 1324ff78c80..bd4e2d5e58e 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -25,15 +25,14 @@ namespace detail { struct jaccard_functor_t { template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b, - weight_t cardinality_a_union_b) const + weight_t __device__ compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return (fabs(static_cast(cardinality_a_union_b) - double{0}) < - double{2} / double{1 << 30}) + return weight_a_union_b <= std::numeric_limits::min() ? weight_t{0} - : cardinality_a_intersect_b / cardinality_a_union_b; + : weight_a_intersect_b / weight_a_union_b; } }; diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index e0ab3e5c757..1810df2f76b 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -25,15 +25,14 @@ namespace detail { struct overlap_functor_t { template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b, - weight_t cardinality_a_union_b) const + weight_t __device__ compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return (fabs(static_cast(cardinality_a_union_b) - double{0}) < - double{2} / double{1 << 30}) + return std::min(weight_a, weight_b) <= std::numeric_limits::min() ? weight_t{0} - : cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b); + : weight_a_intersect_b / std::min(weight_a, weight_b); } }; diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 430f56d12c3..af61f552078 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -52,27 +53,31 @@ rmm::device_uvector similarity( auto vertex_pairs_begin = thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data()); - if (edge_weight_view) { - // FIXME: need implementation, similar to unweighted - // Use compute_out_weight_sums instead of compute_out_degrees - // Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and - // max weight((u,a), (a,v)). - // Use these to compute weighted score - // + if (do_expensive_check) { + auto num_invalids = detail::count_invalid_vertex_pairs( + handle, graph_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input arguments: there are invalid input vertex pairs."); + + if (edge_weight_view) { + auto num_negative_edge_weights = + count_if_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; }); + CUGRAPH_EXPECTS( + num_negative_edge_weights == 0, + "Invalid input argument: input edge weights should have non-negative values."); + } + } + if (edge_weight_view) { rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); - - // - // Compute vertex_degree for all vertices, then distribute to each GPU. - // Need to use this instead of the dummy properties below - // - rmm::device_uvector weighted_out_degrees = compute_out_weight_sums(handle, graph_view, *edge_weight_view); - rmm::device_uvector vertex_weights = - compute_out_weight_sums(handle, graph_view, *edge_weight_view); - per_v_pair_transform_dst_nbr_intersection( handle, graph_view, @@ -87,15 +92,15 @@ rmm::device_uvector similarity( auto intersection, auto intersected_properties_a, auto intersected_properties_b) { - weight_t min_weight_a_intersect_b = weight_t{0}; - weight_t max_weight_a_intersect_b = weight_t{0}; - weight_t sum_of_intersected_a = weight_t{0}; - weight_t sum_of_intersected_b = weight_t{0}; + weight_t sum_of_min_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_max_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_intersected_a = weight_t{0}; + weight_t sum_of_intersected_b = weight_t{0}; for (size_t k = 0; k < intersection.size(); k++) { - min_weight_a_intersect_b += + sum_of_min_weight_a_intersect_b += std::min(intersected_properties_a[k], intersected_properties_b[k]); - max_weight_a_intersect_b += + sum_of_max_weight_a_intersect_b += std::max(intersected_properties_a[k], intersected_properties_b[k]); sum_of_intersected_a += intersected_properties_a[k]; sum_of_intersected_b += intersected_properties_b[k]; @@ -104,12 +109,12 @@ rmm::device_uvector similarity( weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; - max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; + sum_of_max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; return functor.compute_score(static_cast(weight_a), static_cast(weight_b), - static_cast(min_weight_a_intersect_b), - static_cast(max_weight_a_intersect_b)); + static_cast(sum_of_min_weight_a_intersect_b), + static_cast(sum_of_max_weight_a_intersect_b)); }, similarity_score.begin(), do_expensive_check); @@ -118,10 +123,6 @@ rmm::device_uvector similarity( } else { rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); - // - // Compute vertex_degree for all vertices, then distribute to each GPU. - // Need to use this instead of the dummy properties below - // auto out_degrees = graph_view.compute_out_degrees(handle); per_v_pair_transform_dst_nbr_intersection( diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index d13620c0448..00c9a8107f3 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -25,15 +25,14 @@ namespace detail { struct sorensen_functor_t { template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b, - weight_t cardinality_a_union_b) const + weight_t __device__ compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return (fabs(static_cast(cardinality_a_union_b) - double{0}) < - double{2} / double{1 << 30}) + return (weight_a + weight_b) <= std::numeric_limits::min() ? weight_t{0} - : (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b); + : (2 * weight_a_intersect_b) / (weight_a + weight_b); } }; From d2cb64f0efaf0ff5c631ee350ac1c1d9fe530075 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Tue, 25 Jul 2023 07:24:17 -0700 Subject: [PATCH 13/22] Address PR comments part-2 --- cpp/src/prims/detail/nbr_intersection.cuh | 413 +++++++++--------- ..._v_pair_transform_dst_nbr_intersection.cuh | 12 +- .../mg_weighted_similarity_test.cpp | 236 ++-------- .../link_prediction/similarity_compare.cpp | 24 - ...r_v_pair_transform_dst_nbr_intersection.cu | 13 +- 5 files changed, 250 insertions(+), 448 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 1b59f6e970a..8eb7c08edaf 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -171,7 +171,7 @@ struct update_rx_major_local_degree_t { template struct update_rx_major_local_nbrs_t { @@ -191,7 +191,7 @@ struct update_rx_major_local_nbrs_t { raft::device_span local_nbrs_for_rx_majors{}; - raft::device_span local_nbrs_properties_for_rx_majors{}; + raft::device_span local_nbrs_properties_for_rx_majors{}; __device__ void operator()(size_t idx) { @@ -223,20 +223,20 @@ struct update_rx_major_local_nbrs_t { // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - size_t pos = local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + - local_edge_partition_idx] + - offset_in_local_edge_partition]; - thrust::copy( - thrust::seq, indices, indices + local_degree, local_nbrs_for_rx_majors.begin() + pos); - - if constexpr (!std::is_same_v) { - auto nbrs_properties_start = local_nbrs_properties_for_rx_majors.begin() + pos; - - auto eddge_property_start = edge_partition_e_value_input.value_first() + edge_offset; - - for (size_t k = 0; k < local_degree; k++) { - nbrs_properties_start[k] = *(eddge_property_start + k); - } + size_t start_offset = + local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + + local_edge_partition_idx] + + offset_in_local_edge_partition]; + thrust::copy(thrust::seq, + indices, + indices + local_degree, + local_nbrs_for_rx_majors.begin() + start_offset); + + if constexpr (!std::is_same_v) { + thrust::copy(thrust::seq, + edge_partition_e_value_input.value_first() + edge_offset, + edge_partition_e_value_input.value_first() + (edge_offset + local_degree), + local_nbrs_properties_for_rx_majors.begin() + start_offset); } } }; @@ -333,19 +333,19 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; - raft::device_span first_element_properties{nullptr}; + raft::device_span first_element_properties{nullptr}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; - raft::device_span second_element_properties{nullptr}; + raft::device_span second_element_properties{nullptr}; edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; @@ -353,9 +353,9 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - raft::device_span nbr_intersection_properties0{nullptr}; - raft::device_span nbr_intersection_properties1{nullptr}; - + raft::device_span nbr_intersection_properties0{nullptr}; + raft::device_span nbr_intersection_properties1{nullptr}; + raft::device_span nbr_intersection_idx_buffer{nullptr}; vertex_t invalid_id{}; __device__ edge_t operator()(size_t i) @@ -363,7 +363,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto pair = *(vertex_pair_first + i); vertex_t const* indices0{nullptr}; - EdgeProperty_t const* property0{nullptr}; + edge_property_value_t const* property0{nullptr}; [[maybe_unused]] edge_t local_edge_offset0{0}; edge_t local_degree0{0}; @@ -387,27 +387,25 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { thrust::tie(indices0, local_edge_offset0, local_degree0) = edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } + + if constexpr (!std::is_same_v) { + property0 = edge_partition_e_value_input.value_first() + local_edge_offset0; + } + } else { auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_degree0 = static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); indices0 = first_element_indices.begin() + first_element_offsets[idx]; local_edge_offset0 = first_element_offsets[idx]; - } - - if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - property0 = edge_partition_e_value_input.value_first() + local_edge_offset0; - } - - } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { property0 = first_element_properties.begin() + local_edge_offset0; } } vertex_t const* indices1{nullptr}; - EdgeProperty_t const* property1{nullptr}; + edge_property_value_t const* property1{nullptr}; + [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { @@ -430,6 +428,11 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { thrust::tie(indices1, local_edge_offset1, local_degree1) = edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } + + if constexpr (!std::is_same_v) { + property1 = edge_partition_e_value_input.value_first() + local_edge_offset1; + } + } else { auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_degree1 = @@ -437,14 +440,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { indices1 = second_element_indices.begin() + second_element_offsets[idx]; local_edge_offset1 = second_element_offsets[idx]; - } - - if constexpr (std::is_same_v) { - if constexpr (!std::is_same_v) { - property1 = edge_partition_e_value_input.value_first() + local_edge_offset1; - } - } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { property1 = second_element_properties.begin() + local_edge_offset1; } } @@ -453,36 +449,38 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - auto inbr_start = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; + auto nbr_intersection_first = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; - auto it = thrust::set_intersection(thrust::seq, - indices0, - indices0 + local_degree0, - indices1, - indices1 + local_degree1, - inbr_start); + auto nbr_intersection_last = thrust::set_intersection(thrust::seq, + indices0, + indices0 + local_degree0, + indices1, + indices1 + local_degree1, + nbr_intersection_first); thrust::fill(thrust::seq, - it, + nbr_intersection_last, nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], invalid_id); - auto insection_size = static_cast(thrust::distance(inbr_start, it)); - if constexpr (!std::is_same_v) { + auto insection_size = + static_cast(thrust::distance(nbr_intersection_first, nbr_intersection_last)); + if constexpr (!std::is_same_v) { auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; // copy edge properties from first vertex to common neighbors thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, - inbr_start, - it, - ip0_start, // indices + nbr_intersection_first, + nbr_intersection_last, + nbr_intersection_idx_buffer.begin() + + nbr_intersection_offsets[i], // ip0_start, // indices thrust::less()); thrust::transform( thrust::seq, - ip0_start, - ip0_start + insection_size, + nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i], + nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i] + insection_size, ip0_start, [property0] __device__(auto idx) { return property0[static_cast(idx)]; }); @@ -492,19 +490,20 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, - inbr_start, - it, - ip1_start, // indices + nbr_intersection_first, + nbr_intersection_last, + nbr_intersection_idx_buffer.begin() + + nbr_intersection_offsets[i], // ip1_start, // indices thrust::less()); thrust::transform( thrust::seq, - ip1_start, - ip1_start + insection_size, + nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i], + nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i] + insection_size, ip1_start, [property1] __device__(auto idx) { return property1[static_cast(idx)]; }); } - return static_cast(thrust::distance(inbr_start, it)); + return insection_size; } }; @@ -524,7 +523,7 @@ struct strided_accumulate_t { } }; -template +template struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -534,15 +533,15 @@ struct gatherv_indices_t { raft::device_span combined_nbr_intersection_offsets{}; raft::device_span combined_nbr_intersection_indices{}; - std::optional> gathered_nbr_intersection_properties0{ - std::nullopt}; - std::optional> gathered_nbr_intersection_properties1{ - std::nullopt}; + thrust::optional> + gathered_nbr_intersection_properties0{thrust::nullopt}; + thrust::optional> + gathered_nbr_intersection_properties1{thrust::nullopt}; - std::optional> combined_nbr_intersection_properties0{ - std::nullopt}; - std::optional> combined_nbr_intersection_properties1{ - std::nullopt}; + thrust::optional> combined_nbr_intersection_properties0{ + thrust::nullopt}; + thrust::optional> combined_nbr_intersection_properties1{ + thrust::nullopt}; __device__ void operator()(size_t i) const { @@ -562,7 +561,7 @@ struct gatherv_indices_t { combined_nbr_intersection_properties1->begin())); for (int j = 0; j < minor_comm_size; ++j) { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::copy(thrust::seq, zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1], @@ -695,8 +694,7 @@ nbr_intersection(raft::handle_t const& handle, typename EdgeValueInputWrapper::value_iterator, typename EdgeValueInputWrapper::value_type>>; - // using EdgeProperty_t = typename edge_partition_e_input_device_view_t::value_type; - using EdgeProperty_t = typename EdgeValueInputWrapper::value_type; + using edge_property_value_t = typename EdgeValueInputWrapper::value_type; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -734,7 +732,7 @@ nbr_intersection(raft::handle_t const& handle, std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; - std::optional> major_nbr_properties{std::nullopt}; + std::optional> major_nbr_properties{std::nullopt}; if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { @@ -854,13 +852,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); - // rmm::device_uvector local_nbrs_properties_for_rx_majors(size_t{0}, - // handle.get_stream()); - std::optional> local_nbrs_properties_for_rx_majors{ + std::optional> local_nbrs_properties_for_rx_majors{ std::nullopt}; - if constexpr (!std::is_same_v) { - local_nbrs_properties_for_rx_majors = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + if constexpr (!std::is_same_v) { + local_nbrs_properties_for_rx_majors = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); } std::vector local_nbr_counts{}; @@ -958,7 +954,7 @@ nbr_intersection(raft::handle_t const& handle, thrust::make_counting_iterator(reordered_idx_last), update_rx_major_local_nbrs_t{ major_comm_size, @@ -975,8 +971,9 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - raft::device_span((*local_nbrs_properties_for_rx_majors).data(), - (*local_nbrs_properties_for_rx_majors).size())}); + raft::device_span( + (*local_nbrs_properties_for_rx_majors).data(), + (*local_nbrs_properties_for_rx_majors).size())}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -1022,9 +1019,9 @@ nbr_intersection(raft::handle_t const& handle, std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - if constexpr (!std::is_same_v) { - major_nbr_properties = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + if constexpr (!std::is_same_v) { + major_nbr_properties = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); std::tie(*major_nbr_properties, std::ignore) = shuffle_values(major_comm, @@ -1061,18 +1058,19 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); - // rmm::device_uvector nbr_intersection_properties0(size_t{0}, - // handle.get_stream()); rmm::device_uvector - // nbr_intersection_properties1(size_t{0}, handle.get_stream()); - - std::optional> nbr_intersection_properties0{std::nullopt}; - std::optional> nbr_intersection_properties1{std::nullopt}; - - if constexpr (!std::is_same_v) { - nbr_intersection_properties0 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); - nbr_intersection_properties1 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + std::optional> nbr_intersection_properties0{ + std::nullopt}; + std::optional> nbr_intersection_properties1{ + std::nullopt}; + std::optional> nbr_intersection_idx_buffer{std::nullopt}; + + if constexpr (!std::is_same_v) { + nbr_intersection_properties0 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); + nbr_intersection_properties1 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); + nbr_intersection_idx_buffer = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); } if constexpr (GraphViewType::is_multi_gpu) { @@ -1118,10 +1116,12 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - std::vector> edge_partition_nbr_intersection_property0{}; - std::vector> edge_partition_nbr_intersection_property1{}; + std::vector> + edge_partition_nbr_intersection_property0{}; + std::vector> + edge_partition_nbr_intersection_property1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_property0.reserve( graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_property1.reserve( @@ -1144,22 +1144,22 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); - // rmm::device_uvector (*rx_v_pair_nbr_intersection_properties0)( - // size_t{0}, handle.get_stream()); + std::optional> + rx_v_pair_nbr_intersection_properties0{std::nullopt}; + std::optional> + rx_v_pair_nbr_intersection_properties1{std::nullopt}; - // rmm::device_uvector (*rx_v_pair_nbr_intersection_properties1)( - // size_t{0}, handle.get_stream()); - - std::optional> rx_v_pair_nbr_intersection_properties0{ - std::nullopt}; - std::optional> rx_v_pair_nbr_intersection_properties1{ + std::optional> rx_v_pair_nbr_intersection_idx_buffer{ std::nullopt}; - if constexpr (!std::is_same_v) { - rx_v_pair_nbr_intersection_properties0 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); - rx_v_pair_nbr_intersection_properties1 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + if constexpr (!std::is_same_v) { + rx_v_pair_nbr_intersection_properties0 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); + rx_v_pair_nbr_intersection_properties1 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); + + rx_v_pair_nbr_intersection_idx_buffer = + std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); } std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); @@ -1225,11 +1225,13 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { (*rx_v_pair_nbr_intersection_properties0) .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); (*rx_v_pair_nbr_intersection_properties1) .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + (*rx_v_pair_nbr_intersection_idx_buffer) + .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto& comm = handle.get_comms(); @@ -1248,38 +1250,41 @@ nbr_intersection(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer)), vertex_t, edge_t, - EdgeProperty_t, + edge_property_value_t, edge_partition_e_input_device_view_t, - true>{ - nullptr, - raft::device_span(), - raft::device_span(), - raft::device_span(), - second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), - (*major_nbr_offsets).size()), - raft::device_span((*major_nbr_indices).data(), - (*major_nbr_indices).size()), - raft::device_span((*major_nbr_properties).data(), - (*major_nbr_properties).size()), - edge_partition, - edge_partition_e_value_input, - get_dataframe_buffer_begin(vertex_pair_buffer), - raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), - rx_v_pair_nbr_intersection_offsets.size()), - raft::device_span(rx_v_pair_nbr_intersection_indices.data(), - rx_v_pair_nbr_intersection_indices.size()), - raft::device_span((*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size()), - raft::device_span((*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size()), - invalid_vertex_id::value}); + true>{nullptr, + raft::device_span(), + raft::device_span(), + raft::device_span(), + second_element_to_idx_map, + raft::device_span((*major_nbr_offsets).data(), + (*major_nbr_offsets).size()), + raft::device_span((*major_nbr_indices).data(), + (*major_nbr_indices).size()), + raft::device_span((*major_nbr_properties).data(), + (*major_nbr_properties).size()), + edge_partition, + edge_partition_e_value_input, + get_dataframe_buffer_begin(vertex_pair_buffer), + raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), + rx_v_pair_nbr_intersection_offsets.size()), + raft::device_span(rx_v_pair_nbr_intersection_indices.data(), + rx_v_pair_nbr_intersection_indices.size()), + raft::device_span( + (*rx_v_pair_nbr_intersection_properties0).data(), + (*rx_v_pair_nbr_intersection_properties0).size()), + raft::device_span( + (*rx_v_pair_nbr_intersection_properties1).data(), + (*rx_v_pair_nbr_intersection_properties1).size()), + raft::device_span((*rx_v_pair_nbr_intersection_idx_buffer).data(), + (*rx_v_pair_nbr_intersection_idx_buffer).size()), + invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { rx_v_pair_nbr_intersection_indices.resize( thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), @@ -1432,21 +1437,16 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); - // rmm::device_uvector combined_nbr_intersection_properties0( - // size_t{0}, handle.get_stream()); - // rmm::device_uvector combined_nbr_intersection_properties1( - // size_t{0}, handle.get_stream()); + std::optional> + combined_nbr_intersection_properties0{std::nullopt}; + std::optional> + combined_nbr_intersection_properties1{std::nullopt}; - std::optional> combined_nbr_intersection_properties0{ - std::nullopt}; - std::optional> combined_nbr_intersection_properties1{ - std::nullopt}; - - if constexpr (!std::is_same_v) { - combined_nbr_intersection_properties0 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); - combined_nbr_intersection_properties1 = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); + if constexpr (!std::is_same_v) { + combined_nbr_intersection_properties0 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); + combined_nbr_intersection_properties1 = std::make_optional( + rmm::device_uvector(size_t{0}, handle.get_stream())); } { @@ -1484,31 +1484,23 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - // rmm::device_uvector gathered_nbr_intersection_properties0( - // rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), - // handle.get_stream()); - - // rmm::device_uvector gathered_nbr_intersection_properties1( - // rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), - // handle.get_stream()); + std::optional> + gathered_nbr_intersection_properties0{std::nullopt}; + std::optional> + gathered_nbr_intersection_properties1{std::nullopt}; - std::optional> gathered_nbr_intersection_properties0{ - std::nullopt}; - std::optional> gathered_nbr_intersection_properties1{ - std::nullopt}; - - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { gathered_nbr_intersection_properties0 = - std::make_optional(rmm::device_uvector( + std::make_optional(rmm::device_uvector( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream())); gathered_nbr_intersection_properties1 = - std::make_optional(rmm::device_uvector( + std::make_optional(rmm::device_uvector( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream())); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, (*rx_v_pair_nbr_intersection_properties0).begin(), rx_v_pair_nbr_intersection_index_tx_counts, @@ -1541,12 +1533,12 @@ nbr_intersection(raft::handle_t const& handle, .resize((*gathered_nbr_intersection_properties1).size(), handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1558,15 +1550,19 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span(combined_nbr_intersection_indices.data(), combined_nbr_intersection_indices.size()), - raft::device_span((*gathered_nbr_intersection_properties0).data(), - (*gathered_nbr_intersection_properties0).size()), - raft::device_span((*gathered_nbr_intersection_properties1).data(), - (*gathered_nbr_intersection_properties1).size()), + raft::device_span( + (*gathered_nbr_intersection_properties0).data(), + (*gathered_nbr_intersection_properties0).size()), + raft::device_span( + (*gathered_nbr_intersection_properties1).data(), + (*gathered_nbr_intersection_properties1).size()), - raft::device_span((*combined_nbr_intersection_properties0).data(), - (*combined_nbr_intersection_properties0).size()), - raft::device_span((*combined_nbr_intersection_properties1).data(), - (*combined_nbr_intersection_properties1).size()) + raft::device_span( + (*combined_nbr_intersection_properties0).data(), + (*combined_nbr_intersection_properties0).size()), + raft::device_span( + (*combined_nbr_intersection_properties1).data(), + (*combined_nbr_intersection_properties1).size()) }); @@ -1575,7 +1571,7 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1594,7 +1590,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_property0.push_back( std::move((*combined_nbr_intersection_properties0))); edge_partition_nbr_intersection_property1.push_back( @@ -1608,7 +1604,7 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); } @@ -1625,7 +1621,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices[i].end(), nbr_intersection_indices.begin() + index_offset); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::copy(handle.get_thrust_policy(), edge_partition_nbr_intersection_property0[i].begin(), edge_partition_nbr_intersection_property0[i].end(), @@ -1637,7 +1633,6 @@ nbr_intersection(raft::handle_t const& handle, (*nbr_intersection_properties1).begin() + index_offset); } - // Need to copy to (*nbr_intersection_properties0) and (*nbr_intersection_properties1) index_offset += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_offsets.resize(nbr_intersection_sizes.size() + size_t{1}, handle.get_stream()); @@ -1648,7 +1643,7 @@ nbr_intersection(raft::handle_t const& handle, size_first, size_first + nbr_intersection_sizes.size(), nbr_intersection_offsets.begin() + 1); - ///<=========== to here + } else { auto edge_partition = edge_partition_device_view_t( @@ -1685,9 +1680,10 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + (*nbr_intersection_idx_buffer).resize(nbr_intersection_indices.size(), handle.get_stream()); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { @@ -1700,17 +1696,17 @@ nbr_intersection(raft::handle_t const& handle, decltype(vertex_pair_first), vertex_t, edge_t, - EdgeProperty_t, + edge_property_value_t, edge_partition_e_input_device_view_t, false>{ nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + raft::device_span(), nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + raft::device_span(), edge_partition, edge_partition_e_value_input, vertex_pair_first, @@ -1718,10 +1714,12 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), - raft::device_span((*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size()), - raft::device_span((*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size()), + raft::device_span((*nbr_intersection_properties0).data(), + (*nbr_intersection_properties0).size()), + raft::device_span((*nbr_intersection_properties1).data(), + (*nbr_intersection_properties1).size()), + raft::device_span((*nbr_intersection_idx_buffer).data(), + (*nbr_intersection_idx_buffer).size()), invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); @@ -1736,21 +1734,14 @@ nbr_intersection(raft::handle_t const& handle, detail::not_equal_t{invalid_vertex_id::value}), handle.get_stream()); - // rmm::device_uvector tmp_properties0(size_t{0}, handle.get_stream()); - // rmm::device_uvector tmp_properties1(size_t{0}, handle.get_stream()); - // if constexpr (!std::is_same_v) { - // tmp_properties0.resize(tmp_indices.size(), handle.get_stream()); - // tmp_properties1.resize(tmp_indices.size(), handle.get_stream()); - // } - - std::optional> tmp_properties0{std::nullopt}; - std::optional> tmp_properties1{std::nullopt}; + std::optional> tmp_properties0{std::nullopt}; + std::optional> tmp_properties1{std::nullopt}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { tmp_properties0 = std::make_optional( - rmm::device_uvector(tmp_indices.size(), handle.get_stream())); + rmm::device_uvector(tmp_indices.size(), handle.get_stream())); tmp_properties1 = std::make_optional( - rmm::device_uvector(tmp_indices.size(), handle.get_stream())); + rmm::device_uvector(tmp_indices.size(), handle.get_stream())); } auto zipped_itr_to_indices_and_properties_begin = @@ -1769,7 +1760,7 @@ nbr_intersection(raft::handle_t const& handle, size_t{1} << 27, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { num_copied += static_cast(thrust::distance( tmp_indices.begin() + num_copied, thrust::copy_if(handle.get_thrust_policy(), @@ -1794,14 +1785,14 @@ nbr_intersection(raft::handle_t const& handle, num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_properties0 = std::move(tmp_properties0); nbr_intersection_properties1 = std::move(tmp_properties1); } #else - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { nbr_intersection_indices.resize( thrust::distance(nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index b40e0f102ce..cb511ff5003 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -133,8 +133,15 @@ struct call_intersection_op_t { auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); - auto properties0 = raft::device_span(); - auto properties1 = raft::device_span(); + std::conditional_t, + raft::device_span, + std::byte> + properties0{}; + + std::conditional_t, + raft::device_span, + std::byte> + properties1{}; if constexpr (!std::is_same_v) { properties0 = raft::device_span( @@ -144,6 +151,7 @@ struct call_intersection_op_t { nbr_intersection_properties1 + nbr_offsets[i], nbr_intersection_properties1 + +nbr_offsets[i + 1]); } + property_t src_prop{}; property_t dst_prop{}; if (unique_vertices) { diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp index 8e1fa90bfa1..cf3179d51a3 100644 --- a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -27,15 +27,15 @@ #include -struct Similarity_Usecase { +struct Weighted_Similarity_Usecase { bool use_weights{true}; - bool check_correctness{true}; size_t max_seeds{std::numeric_limits::max()}; + bool check_correctness{true}; }; template class Tests_MGSimilarity - : public ::testing::TestWithParam> { + : public ::testing::TestWithParam> { public: Tests_MGSimilarity() {} @@ -47,8 +47,9 @@ class Tests_MGSimilarity virtual void TearDown() {} template - void run_current_test(std::tuple param, - test_functor_t const& test_functor) + void run_current_test( + std::tuple param, + test_functor_t const& test_functor) { auto [similarity_usecase, input_usecase] = param; HighResTimer hr_timer{}; @@ -88,46 +89,9 @@ class Tests_MGSimilarity (static_cast(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)), handle_->get_stream()); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector( - "d_start_vertices", d_start_vertices.data(), d_start_vertices.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - */ - cugraph::test::populate_vertex_ids( *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first()); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector( - "d_start_vertices", d_start_vertices.data(), d_start_vertices.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - */ - auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs( *handle_, mg_graph_view, @@ -142,81 +106,9 @@ class Tests_MGSimilarity std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]); } - // h_v1.resize(1); - // two_hop_nbrs.resize(1, handle_->get_stream()); - // two_hop_nbrs.shrink_to_fit(handle_->get_stream()); - auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("d_v1", d_v1.data(), d_v1.size(), std::cout); - - raft::print_device_vector("d_v2", d_v2.data(), d_v2.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - */ - - /* - ////// - - bool test_weighted = true; - bool renumber = true; - std::string file_path = "/home/nfs/mnaim/csv/similarity.csv"; - std::tie(mg_graph, mg_edge_weights, d_mg_renumber_map_labels) = - cugraph::test::read_graph_from_csv_file( - *handle_, file_path, test_weighted, renumber); - - std::tie(mg_graph, mg_edge_weights, d_mg_renumber_map_labels) = cugraph::symmetrize_graph( - *handle_, - std::move(mg_graph), - std::move(mg_edge_weights), - d_mg_renumber_map_labels - ? std::optional>(std::move(*d_mg_renumber_map_labels)) - : std::nullopt, - false); - - mg_graph_view = mg_graph.view(); - mg_edge_weight_view = (*mg_edge_weights).view(); - - //// - - std::vector h_v1 = {}; - if (comm_rank==0){h_v1.push_back(2);} - auto d_v1 = cugraph::test::to_device(*handle_, h_v1); - - std::vector h_v2 = {}; - if (comm_rank==0){h_v2.push_back(3);} - auto d_v2 = cugraph::test::to_device(*handle_, h_v2); - - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("d_v1", d_v1.data(), d_v1.size(), std::cout); - - raft::print_device_vector("d_v2", d_v2.data(), d_v2.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - */ std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, @@ -230,24 +122,6 @@ class Tests_MGSimilarity std::nullopt, mg_graph_view.vertex_partition_range_lasts()); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("d_v1", d_v1.data(), d_v1.size(), std::cout); - - raft::print_device_vector("d_v2", d_v2.data(), d_v2.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - }*/ - std::tuple, raft::device_span> vertex_pairs{ {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}}; @@ -273,62 +147,16 @@ class Tests_MGSimilarity auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(*handle_, mg_graph_view, mg_edge_weight_view); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - - std::cout << "mg_graph_view: " << mg_graph_view.number_of_vertices() << ", "<< - mg_graph_view.number_of_edges() << std::endl; raft::print_host_vector("src", src.data(), - src.size(), std::cout); raft::print_host_vector("dst", dst.data(), dst.size(), std::cout); - raft::print_host_vector("wgt", wgt->data(), wgt->size(), std::cout); - - - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - - */ - d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size()); d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size()); result_score = cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size()); - /* - for (int k = 0; k < comm_size; k++) { - auto& comm = handle_->get_comms(); - - comm.barrier(); - if (comm_rank == k) { - std::cout << "Rank :" << comm_rank << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - raft::print_device_vector("gathered d_v1", d_v1.data(), d_v1.size(), std::cout); - - raft::print_device_vector("gathered d_v2", d_v2.data(), d_v2.size(), std::cout); - - std::cout << "------------------" << std::endl; - } - comm.barrier(); - } - */ - if (d_v1.size() > 0) { auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1); auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2); auto h_result_score = cugraph::test::to_host(*handle_, result_score); - std::cout << "pari size: " << h_vertex_pair1.size() << " " << h_vertex_pair2.size() - << std::endl; - if (wgt && similarity_usecase.use_weights) { weighted_similarity_compare(mg_graph_view.number_of_vertices(), std::tie(src, dst, wgt), @@ -353,80 +181,80 @@ class Tests_MGSimilarity template std::unique_ptr Tests_MGSimilarity::handle_ = nullptr; -using Tests_MGSimilarity_File = Tests_MGSimilarity; -using Tests_MGSimilarity_Rmat = Tests_MGSimilarity; +using Tests_MGWeightedSimilarity_File = Tests_MGSimilarity; +using Tests_MGWeightedSimilarity_Rmat = Tests_MGSimilarity; -TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatFloatJaccard) +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatFloatJaccard) { auto param = GetParam(); run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatFloatJaccard) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatFloatJaccard) { auto param = GetParam(); run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatFloatJaccard) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatFloatJaccard) { auto param = GetParam(); run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatFloatJaccard) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatFloatJaccard) { auto param = GetParam(); run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); } -TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatSorensen) +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatSorensen) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatSorensen) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatSorensen) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatSorensen) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatSorensen) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatSorensen) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatSorensen) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); } -TEST_P(Tests_MGSimilarity_File, CheckInt32Int32FloatOverlap) +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatOverlap) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int32FloatOverlap) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatOverlap) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt32Int64FloatOverlap) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatOverlap) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); } -TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatOverlap) +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatOverlap) { run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); @@ -434,25 +262,25 @@ TEST_P(Tests_MGSimilarity_Rmat, CheckInt64Int64FloatOverlap) INSTANTIATE_TEST_SUITE_P( file_test, - Tests_MGSimilarity_File, + Tests_MGWeightedSimilarity_File, ::testing::Combine( // enable correctness checks // Disable weighted computation testing in 22.10 - //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}), - ::testing::Values(Similarity_Usecase{true, true, 20}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx") - // , cugraph::test::File_Usecase("test/datasets/netscience.mtx") - ))); + //::testing::Values(Weighted_Similarity_Usecase{true, 20, true}, + //: Weighted_Similarity_Usecase{false, 20, true}), + ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); INSTANTIATE_TEST_SUITE_P( rmat_small_test, - Tests_MGSimilarity_Rmat, + Tests_MGWeightedSimilarity_Rmat, ::testing::Combine( // enable correctness checks // Disable weighted computation testing in 22.10 - //::testing::Values(Similarity_Usecase{true, true, 20}, - // Similarity_Usecase{false, true, 20}), - ::testing::Values(Similarity_Usecase{true, true, 20}), + //::testing::Values(Weighted_Similarity_Usecase{true, 20, true}, + // Weighted_Similarity_Usecase{false, 20, true}), + ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -461,10 +289,10 @@ INSTANTIATE_TEST_SUITE_P( vertex & edge type combination) by command line arguments and do not include more than one Rmat_Usecase that differ only in scale or edge factor (to avoid running same benchmarks more than once) */ - Tests_MGSimilarity_Rmat, + Tests_MGWeightedSimilarity_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Similarity_Usecase{true, false, 20}), + ::testing::Values(Weighted_Similarity_Usecase{true, 20, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_prediction/similarity_compare.cpp b/cpp/tests/link_prediction/similarity_compare.cpp index dec14a347f8..b39ee983fa7 100644 --- a/cpp/tests/link_prediction/similarity_compare.cpp +++ b/cpp/tests/link_prediction/similarity_compare.cpp @@ -60,7 +60,6 @@ void weighted_similarity_compare( thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()), compare_pairs); - // FIXME: This only tests unweighted, weighted implementation needs to be different std::vector vertex_degrees(static_cast(num_vertices), size_t{0}); std::vector weighted_vertex_degrees(static_cast(num_vertices), weight_t{0}); @@ -89,11 +88,6 @@ void weighted_similarity_compare( } auto graph_wgt_first = (*graph_wgt).begin(); - - // std::for_each(graph_wgt_first, graph_wgt_first + (*graph_wgt).size(), [](auto val) { - // std::cout << "w: " << val << std::endl; - // }); - std::for_each( thrust::make_zip_iterator(v1.begin(), v2.begin(), similarity_score.begin()), thrust::make_zip_iterator(v1.end(), v2.end(), similarity_score.end()), @@ -163,12 +157,6 @@ void weighted_similarity_compare( intersected_weights_v2[intersected_weight_idx] = static_cast(graph_wgt_first[v2_begin + offset]); - // std::cout << "intersected_weights_v1: " << - // intersected_weights_v1[intersected_weight_idx] - // << std::endl; - // std::cout << "intersected_weights_v2: " << - // intersected_weights_v2[intersected_weight_idx] - // << std::endl; ++intersected_weight_idx; }); @@ -193,20 +181,10 @@ void weighted_similarity_compare( }); max_weight_v1_intersect_v2 += (sum_of_uniq_weights_v1 + sum_of_uniq_weights_v2); - - // std::cout << "wdegs: " << weighted_vertex_degrees[v1] << " " << weighted_vertex_degrees[v2] - // << std::endl; - // std::cout << "min_i, max:" << min_weight_v1_intersect_v2 << " " << - // max_weight_v1_intersect_v2 - // << std::endl; - auto expected_score = test_functor.compute_score(weighted_vertex_degrees[v1], weighted_vertex_degrees[v2], min_weight_v1_intersect_v2, max_weight_v1_intersect_v2); - - // std::cout << "score: " << score << " expected_score: " << expected_score << std::endl; - EXPECT_TRUE(compare_functor(score, expected_score)) << "score mismatch, got " << score << ", expected " << expected_score; }); @@ -277,8 +255,6 @@ void similarity_compare( static_cast(vertex_degrees[v1] + vertex_degrees[v2] - std::distance(intersection.begin(), intersection_end))); - // std::cout << "score: " << score << " expected_score: " << expected_score << std::endl; - EXPECT_TRUE(compare_functor(score, expected_score)) << "score mismatch, got " << score << ", expected " << expected_score; }); diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index aaf64000261..d2d6653627b 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -43,7 +43,7 @@ #include -template +template struct intersection_op_t { __device__ thrust::tuple operator()( vertex_t v0, @@ -51,8 +51,9 @@ struct intersection_op_t { edge_t v0_prop /* out degree */, edge_t v1_prop /* out degree */, raft::device_span intersection, - raft::device_span intersection_p0, - raft::device_span intersection_p1) const + std::byte, /* dummy */ + std::byte /* dummy */ + ) const { return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); } @@ -166,7 +167,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), - intersection_op_t{}, + intersection_op_t{}, cugraph::get_dataframe_buffer_begin(mg_result_buffer)); if (cugraph::test::g_perf) { @@ -235,9 +236,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection mg_aggregate_vertex_pair_buffer /* now unrenumbered */), cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered */), sg_out_degrees.begin(), - intersection_op_t{}, + intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); bool valid = thrust::equal(handle_->get_thrust_policy(), From d35fe1b8de7349958c5e02110306f744214c11fc Mon Sep 17 00:00:00 2001 From: Md Naim Date: Tue, 25 Jul 2023 15:41:15 -0700 Subject: [PATCH 14/22] Address PR comments part-3 --- cpp/src/prims/detail/nbr_intersection.cuh | 37 +++-- ..._v_pair_transform_dst_nbr_intersection.cuh | 130 ++++++++++-------- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 52 +++---- 3 files changed, 118 insertions(+), 101 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 8eb7c08edaf..15eb46c0ea3 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -670,14 +670,17 @@ size_t count_invalid_vertex_pairs(raft::handle_t const& handle, // thrust::distance(vertex_pair_first, vertex_pair_last) should be comparable across the global // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID % // number of groups" is recommended for load-balancing. -template -std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>> +template +std::conditional_t< + !std::is_same_v, + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector>, + std::tuple, rmm::device_uvector>> nbr_intersection(raft::handle_t const& handle, GraphViewType const& graph_view, - EdgeValueInputWrapper edge_value_input, + EdgeValueInputIterator edge_value_input, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, std::array intersect_dst_nbr, @@ -687,14 +690,14 @@ nbr_intersection(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, - typename EdgeValueInputWrapper::value_iterator, - typename EdgeValueInputWrapper::value_type>>; + typename EdgeValueInputIterator::value_iterator, + typename EdgeValueInputIterator::value_type>>; - using edge_property_value_t = typename EdgeValueInputWrapper::value_type; + using edge_property_value_t = typename EdgeValueInputIterator::value_type; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -1826,10 +1829,16 @@ nbr_intersection(raft::handle_t const& handle, // 5. Return - return std::make_tuple(std::move(nbr_intersection_offsets), - std::move(nbr_intersection_indices), - std::move(nbr_intersection_properties0), - std::move(nbr_intersection_properties1)); + if constexpr (std::is_same_v) { + return std::make_tuple(std::move(nbr_intersection_offsets), + std::move(nbr_intersection_indices)); + + } else { + return std::make_tuple(std::move(nbr_intersection_offsets), + std::move(nbr_intersection_indices), + std::move((*nbr_intersection_properties0)), + std::move((*nbr_intersection_properties1))); + } } } // namespace detail diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index cb511ff5003..caf3771e02c 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -97,7 +97,7 @@ struct indirection_compare_less_t { template ::value_type; using edge_property_value_t = - typename thrust::iterator_traits::value_type; + typename thrust::iterator_traits::value_type; auto index = *(major_minor_pair_index_first + i); auto pair = *(major_minor_pair_first + index); @@ -190,8 +190,8 @@ struct call_intersection_op_t { * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPairIterator Type of the iterator for input vertex pairs. - * @tparam VertexValueInputWrapper Type of the wrapper for vertex property values. - * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam VertexValueInputIterator Type of the iterator for vertex property values. + * @tparam EdgeValueInputIterator Type of the iterator for edge property values. * @tparam IntersectionOp Type of the quinary per intersection operator. * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property * variables. @@ -218,13 +218,13 @@ struct call_intersection_op_t { template void per_v_pair_transform_dst_nbr_intersection( raft::handle_t const& handle, GraphViewType const& graph_view, - EdgeValueInputWrapper edge_value_input, + EdgeValueInputIterator edge_value_input, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, VertexValueInputIterator vertex_value_input_first, @@ -237,7 +237,7 @@ void per_v_pair_transform_dst_nbr_intersection( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using property_t = typename thrust::iterator_traits::value_type; - using edge_property_value_t = typename EdgeValueInputWrapper::value_type; + using edge_property_value_t = typename EdgeValueInputIterator::value_type; using result_t = typename thrust::iterator_traits::value_type; CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -382,17 +382,35 @@ void per_v_pair_transform_dst_nbr_intersection( chunk_vertex_pair_index_first, detail::indirection_t{vertex_pair_first}); - auto [intersection_offsets, - intersection_indices, - r_nbr_intersection_properties0, - r_nbr_intersection_properties1] = - detail::nbr_intersection(handle, - graph_view, - edge_value_input, - chunk_vertex_pair_first, - chunk_vertex_pair_first + this_chunk_size, - std::array{true, true}, - do_expensive_check); + rmm::device_uvector intersection_offsets(size_t{0}, handle.get_stream()); + rmm::device_uvector intersection_indices(size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties0( + size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties1( + size_t{0}, handle.get_stream()); + + if constexpr (!std::is_same_v) { + std::tie(intersection_offsets, + intersection_indices, + r_nbr_intersection_properties0, + r_nbr_intersection_properties1) = + detail::nbr_intersection(handle, + graph_view, + edge_value_input, + chunk_vertex_pair_first, + chunk_vertex_pair_first + this_chunk_size, + std::array{true, true}, + do_expensive_check); + } else { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + graph_view, + edge_value_input, + chunk_vertex_pair_first, + chunk_vertex_pair_first + this_chunk_size, + std::array{true, true}, + do_expensive_check); + } if (unique_vertices) { auto vertex_value_input_for_unique_vertices_first = @@ -404,47 +422,45 @@ void per_v_pair_transform_dst_nbr_intersection( detail::call_intersection_op_t< GraphViewType, decltype(vertex_value_input_for_unique_vertices_first), - typename decltype(r_nbr_intersection_properties0)::value_type::const_pointer, + typename decltype(r_nbr_intersection_properties0)::const_pointer, IntersectionOp, decltype(chunk_vertex_pair_index_first), VertexPairIterator, - VertexPairValueOutputIterator>{ - edge_partition, - thrust::make_optional>((*unique_vertices).data(), - (*unique_vertices).size()), - vertex_value_input_for_unique_vertices_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - r_nbr_intersection_properties0 ? r_nbr_intersection_properties0->data() : nullptr, - r_nbr_intersection_properties1 ? r_nbr_intersection_properties1->data() : nullptr, - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + VertexPairValueOutputIterator>{edge_partition, + thrust::make_optional>( + (*unique_vertices).data(), (*unique_vertices).size()), + vertex_value_input_for_unique_vertices_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_properties0.data(), + r_nbr_intersection_properties1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } else { - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t< - GraphViewType, - VertexValueInputIterator, - typename decltype(r_nbr_intersection_properties0)::value_type::const_pointer, - IntersectionOp, - decltype(chunk_vertex_pair_index_first), - VertexPairIterator, - VertexPairValueOutputIterator>{ - edge_partition, - thrust::optional>{thrust::nullopt}, - vertex_value_input_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - r_nbr_intersection_properties0 ? r_nbr_intersection_properties0->data() : nullptr, - r_nbr_intersection_properties1 ? r_nbr_intersection_properties1->data() : nullptr, - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t< + GraphViewType, + VertexValueInputIterator, + typename decltype(r_nbr_intersection_properties0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ + edge_partition, + thrust::optional>{thrust::nullopt}, + vertex_value_input_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_properties0.data(), + r_nbr_intersection_properties1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } chunk_vertex_pair_index_first += this_chunk_size; diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index 47e43601f09..f773a102959 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -65,7 +65,7 @@ struct compute_chunk_id_t { template struct call_intersection_op_t { @@ -78,8 +78,8 @@ struct call_intersection_op_t { IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; - EdgeValueInputWrapper nbr_intersection_properties0{nullptr}; - EdgeValueInputWrapper nbr_intersection_properties1{nullptr}; + EdgeValueInputIterator nbr_intersection_properties0{nullptr}; + EdgeValueInputIterator nbr_intersection_properties1{nullptr}; VertexPairIterator major_minor_pair_first{}; __device__ auto operator()(size_t i) const @@ -342,10 +342,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( chunk_vertex_pair_first + this_chunk_size); // detail::nbr_intersection() requires the // input vertex pairs to be sorted. - auto [intersection_offsets, - intersection_indices, - nbr_intersection_properties0, - nbr_intersection_properties1] = + auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, cugraph::edge_dummy_property_t{}.view(), @@ -363,29 +360,24 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( thrust::make_tuple(get_dataframe_buffer_begin(src_value_buffer), get_dataframe_buffer_begin(dst_value_buffer), get_dataframe_buffer_begin(intersection_value_buffer))); - thrust::tabulate( - handle.get_thrust_policy(), - triplet_first, - triplet_first + this_chunk_size, - detail::call_intersection_op_t< - GraphViewType, - edge_partition_src_input_device_view_t, - edge_partition_dst_input_device_view_t, - // typename decltype(nbr_intersection_properties0)::value_type::const_pointer, - std::nullptr_t, - IntersectionOp, - decltype(chunk_vertex_pair_first)>{ - edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - // nbr_intersection_properties0? nbr_intersection_properties0->data(): nullptr, - // nbr_intersection_properties1? nbr_intersection_properties1->data(): nullptr, - nullptr, - nullptr, - chunk_vertex_pair_first}); + thrust::tabulate(handle.get_thrust_policy(), + triplet_first, + triplet_first + this_chunk_size, + detail::call_intersection_op_t{ + edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + nullptr, + nullptr, + chunk_vertex_pair_first}); rmm::device_uvector endpoint_vertices(size_t{0}, handle.get_stream()); auto endpoint_value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); From cc2197db6c8b3d293ead5b96fc73d73447fb77f1 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Tue, 25 Jul 2023 17:47:43 -0700 Subject: [PATCH 15/22] Address PR comments part-3 --- cpp/src/prims/detail/nbr_intersection.cuh | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 15eb46c0ea3..6c856cc0d09 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -550,15 +550,19 @@ struct gatherv_indices_t { // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree vertices // in a single warp (better optimize if this becomes a performance bottleneck) - auto zipped_gathered_begin = - thrust::make_zip_iterator(thrust::make_tuple(gathered_intersection_indices.begin(), - gathered_nbr_intersection_properties0->begin(), - gathered_nbr_intersection_properties1->begin())); - - auto zipped_combined_begin = - thrust::make_zip_iterator(thrust::make_tuple(combined_nbr_intersection_indices.begin(), - combined_nbr_intersection_properties0->begin(), - combined_nbr_intersection_properties1->begin())); + auto zipped_gathered_begin = thrust::make_zip_iterator(thrust::make_tuple( + gathered_intersection_indices.begin(), + gathered_nbr_intersection_properties0 ? (*gathered_nbr_intersection_properties0).begin() + : nullptr, + gathered_nbr_intersection_properties1 ? (*gathered_nbr_intersection_properties1).begin() + : nullptr)); + + auto zipped_combined_begin = thrust::make_zip_iterator(thrust::make_tuple( + combined_nbr_intersection_indices.begin(), + combined_nbr_intersection_properties0 ? (*combined_nbr_intersection_properties0).begin() + : nullptr, + combined_nbr_intersection_properties1 ? (*combined_nbr_intersection_properties1).begin() + : nullptr)); for (int j = 0; j < minor_comm_size; ++j) { if constexpr (!std::is_same_v) { From be7afcd34936eea6d2ce31f32623d82066072d3e Mon Sep 17 00:00:00 2001 From: Md Naim Date: Wed, 26 Jul 2023 15:27:39 -0700 Subject: [PATCH 16/22] Replace several optionals with conditionals --- cpp/src/prims/detail/nbr_intersection.cuh | 126 +++++++++++++--------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 6c856cc0d09..613cba83f17 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -17,6 +17,8 @@ #include +#include + #include #include #include @@ -702,6 +704,10 @@ nbr_intersection(raft::handle_t const& handle, typename EdgeValueInputIterator::value_type>>; using edge_property_value_t = typename EdgeValueInputIterator::value_type; + using optional_property_buffer_value_type = + std::conditional_t, + edge_property_value_t, + void>; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -859,12 +865,9 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); - std::optional> local_nbrs_properties_for_rx_majors{ - std::nullopt}; - if constexpr (!std::is_same_v) { - local_nbrs_properties_for_rx_majors = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - } + [[maybe_unused]] auto local_nbrs_properties_for_rx_majors = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); std::vector local_nbr_counts{}; { @@ -939,9 +942,14 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - if (local_nbrs_properties_for_rx_majors) - (*local_nbrs_properties_for_rx_majors) - .resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); + raft::device_span local_nbrs_properties_span{}; + + if constexpr (!std::is_same_v) { + local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), + handle.get_stream()); + local_nbrs_properties_span = raft::device_span( + local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size()); + } for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = @@ -978,9 +986,7 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - raft::device_span( - (*local_nbrs_properties_for_rx_majors).data(), - (*local_nbrs_properties_for_rx_majors).size())}); + local_nbrs_properties_span}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -1032,7 +1038,7 @@ nbr_intersection(raft::handle_t const& handle, std::tie(*major_nbr_properties, std::ignore) = shuffle_values(major_comm, - (*local_nbrs_properties_for_rx_majors).begin(), + local_nbrs_properties_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); } @@ -1065,20 +1071,16 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); - std::optional> nbr_intersection_properties0{ - std::nullopt}; - std::optional> nbr_intersection_properties1{ - std::nullopt}; - std::optional> nbr_intersection_idx_buffer{std::nullopt}; - - if constexpr (!std::is_same_v) { - nbr_intersection_properties0 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - nbr_intersection_properties1 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - nbr_intersection_idx_buffer = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); - } + [[maybe_unused]] auto nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + + [[maybe_unused]] auto nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + + [[maybe_unused]] auto nbr_intersection_idx_buffer = + cugraph::detail::allocate_optional_dataframe_buffer(0, handle.get_stream()); if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -1612,8 +1614,8 @@ nbr_intersection(raft::handle_t const& handle, } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); if constexpr (!std::is_same_v) { - (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); - (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); } size_t size_offset{0}; size_t index_offset{0}; @@ -1632,12 +1634,12 @@ nbr_intersection(raft::handle_t const& handle, thrust::copy(handle.get_thrust_policy(), edge_partition_nbr_intersection_property0[i].begin(), edge_partition_nbr_intersection_property0[i].end(), - (*nbr_intersection_properties0).begin() + index_offset); + nbr_intersection_properties0.begin() + index_offset); thrust::copy(handle.get_thrust_policy(), edge_partition_nbr_intersection_property1[i].begin(), edge_partition_nbr_intersection_property1[i].end(), - (*nbr_intersection_properties1).begin() + index_offset); + nbr_intersection_properties1.begin() + index_offset); } index_offset += edge_partition_nbr_intersection_indices[i].size(); @@ -1687,10 +1689,23 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + raft::device_span nbr_intersection_properties0_span{}; + raft::device_span nbr_intersection_properties1_span{}; + raft::device_span nbr_intersection_idx_buffer_span{}; + if constexpr (!std::is_same_v) { - (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); - (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); - (*nbr_intersection_idx_buffer).resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_idx_buffer.resize(nbr_intersection_indices.size(), handle.get_stream()); + + nbr_intersection_properties0_span = raft::device_span( + nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); + + nbr_intersection_properties1_span = raft::device_span( + nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); + + nbr_intersection_idx_buffer_span = raft::device_span( + nbr_intersection_idx_buffer.data(), nbr_intersection_idx_buffer.size()); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { @@ -1721,12 +1736,9 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), - raft::device_span((*nbr_intersection_properties0).data(), - (*nbr_intersection_properties0).size()), - raft::device_span((*nbr_intersection_properties1).data(), - (*nbr_intersection_properties1).size()), - raft::device_span((*nbr_intersection_idx_buffer).data(), - (*nbr_intersection_idx_buffer).size()), + nbr_intersection_properties0_span, + nbr_intersection_properties1_span, + nbr_intersection_idx_buffer_span, invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); @@ -1741,23 +1753,31 @@ nbr_intersection(raft::handle_t const& handle, detail::not_equal_t{invalid_vertex_id::value}), handle.get_stream()); - std::optional> tmp_properties0{std::nullopt}; - std::optional> tmp_properties1{std::nullopt}; + [[maybe_unused]] auto tmp_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + tmp_indices.size(), handle.get_stream()); + + [[maybe_unused]] auto tmp_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + tmp_indices.size(), handle.get_stream()); + + raft::device_span tmp_properties0_span{}; + raft::device_span tmp_properties1_span{}; if constexpr (!std::is_same_v) { - tmp_properties0 = std::make_optional( - rmm::device_uvector(tmp_indices.size(), handle.get_stream())); - tmp_properties1 = std::make_optional( - rmm::device_uvector(tmp_indices.size(), handle.get_stream())); + tmp_properties0_span = + raft::device_span(tmp_properties0.data(), tmp_properties0.size()); + tmp_properties1_span = + raft::device_span(tmp_properties1.data(), tmp_properties1.size()); } auto zipped_itr_to_indices_and_properties_begin = thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), - (*nbr_intersection_properties0).begin(), - (*nbr_intersection_properties1).begin())); + nbr_intersection_properties0_span.begin(), + nbr_intersection_properties1_span.begin())); auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( - tmp_indices.begin(), (*tmp_properties0).begin(), (*tmp_properties1).begin())); + tmp_indices.begin(), tmp_properties0_span.begin(), tmp_properties1_span.begin())); size_t num_copied{0}; size_t num_scanned{0}; @@ -1820,8 +1840,8 @@ nbr_intersection(raft::handle_t const& handle, })), handle.get_stream()); - (*nbr_intersection_properties0).resize(nbr_intersection_indices.size(), handle.get_stream()); - (*nbr_intersection_properties1).resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); } #endif @@ -1840,8 +1860,8 @@ nbr_intersection(raft::handle_t const& handle, } else { return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices), - std::move((*nbr_intersection_properties0)), - std::move((*nbr_intersection_properties1))); + std::move(nbr_intersection_properties0), + std::move(nbr_intersection_properties1)); } } From 4707d7954fc796139923f023454bede7e98865e1 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Thu, 27 Jul 2023 11:49:04 -0700 Subject: [PATCH 17/22] Replace several optional variables with conditional variables --- cpp/src/prims/detail/nbr_intersection.cuh | 294 +++++++++++----------- 1 file changed, 144 insertions(+), 150 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 613cba83f17..ddc1aa4daf8 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -175,6 +175,7 @@ template struct update_rx_major_local_nbrs_t { int major_comm_size{}; @@ -190,10 +191,8 @@ struct update_rx_major_local_nbrs_t { raft::device_span rx_group_firsts{nullptr}; raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; - raft::device_span local_nbrs_for_rx_majors{}; - - raft::device_span local_nbrs_properties_for_rx_majors{}; + optional_property_buffer_t local_nbrs_properties_for_rx_majors{}; __device__ void operator()(size_t idx) { @@ -337,6 +336,8 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; @@ -355,9 +356,11 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - raft::device_span nbr_intersection_properties0{nullptr}; - raft::device_span nbr_intersection_properties1{nullptr}; - raft::device_span nbr_intersection_idx_buffer{nullptr}; + + optional_property_buffer_t nbr_intersection_properties0{}; + optional_property_buffer_t nbr_intersection_properties1{}; + optional_idx_buffer_t nbr_intersection_idx_buffer{}; + vertex_t invalid_id{}; __device__ edge_t operator()(size_t i) @@ -525,7 +528,7 @@ struct strided_accumulate_t { } }; -template +template struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -535,15 +538,10 @@ struct gatherv_indices_t { raft::device_span combined_nbr_intersection_offsets{}; raft::device_span combined_nbr_intersection_indices{}; - thrust::optional> - gathered_nbr_intersection_properties0{thrust::nullopt}; - thrust::optional> - gathered_nbr_intersection_properties1{thrust::nullopt}; - - thrust::optional> combined_nbr_intersection_properties0{ - thrust::nullopt}; - thrust::optional> combined_nbr_intersection_properties1{ - thrust::nullopt}; + optional_property_buffer_t gathered_nbr_intersection_properties0{}; + optional_property_buffer_t gathered_nbr_intersection_properties1{}; + optional_property_buffer_t combined_nbr_intersection_properties0{}; + optional_property_buffer_t combined_nbr_intersection_properties1{}; __device__ void operator()(size_t i) const { @@ -552,22 +550,18 @@ struct gatherv_indices_t { // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree vertices // in a single warp (better optimize if this becomes a performance bottleneck) - auto zipped_gathered_begin = thrust::make_zip_iterator(thrust::make_tuple( - gathered_intersection_indices.begin(), - gathered_nbr_intersection_properties0 ? (*gathered_nbr_intersection_properties0).begin() - : nullptr, - gathered_nbr_intersection_properties1 ? (*gathered_nbr_intersection_properties1).begin() - : nullptr)); - - auto zipped_combined_begin = thrust::make_zip_iterator(thrust::make_tuple( - combined_nbr_intersection_indices.begin(), - combined_nbr_intersection_properties0 ? (*combined_nbr_intersection_properties0).begin() - : nullptr, - combined_nbr_intersection_properties1 ? (*combined_nbr_intersection_properties1).begin() - : nullptr)); - for (int j = 0; j < minor_comm_size; ++j) { if constexpr (!std::is_same_v) { + auto zipped_gathered_begin = thrust::make_zip_iterator( + thrust::make_tuple(gathered_intersection_indices.begin(), + gathered_nbr_intersection_properties0.begin(), + gathered_nbr_intersection_properties1.begin())); + + auto zipped_combined_begin = thrust::make_zip_iterator( + thrust::make_tuple(combined_nbr_intersection_indices.begin(), + combined_nbr_intersection_properties0.begin(), + combined_nbr_intersection_properties1.begin())); + thrust::copy(thrust::seq, zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1], @@ -708,6 +702,18 @@ nbr_intersection(raft::handle_t const& handle, std::conditional_t, edge_property_value_t, void>; + using optional_property_buffer_idx_type = + std::conditional_t, vertex_t, void>; + + using optional_property_buffer_t = + std::conditional_t, + raft::device_span, + std::byte>; + + using optional_idx_buffer_t = + std::conditional_t, + raft::device_span, + std::byte>; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -942,12 +948,12 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - raft::device_span local_nbrs_properties_span{}; + optional_property_buffer_t optional_local_nbrs_properties{}; if constexpr (!std::is_same_v) { local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); - local_nbrs_properties_span = raft::device_span( + optional_local_nbrs_properties = raft::device_span( local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size()); } @@ -971,6 +977,7 @@ nbr_intersection(raft::handle_t const& handle, edge_t, edge_property_value_t, edge_partition_e_input_device_view_t, + optional_property_buffer_t, GraphViewType::is_multi_gpu>{ major_comm_size, minor_comm_size, @@ -986,7 +993,7 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - local_nbrs_properties_span}); + optional_local_nbrs_properties}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -1080,7 +1087,8 @@ nbr_intersection(raft::handle_t const& handle, 0, handle.get_stream()); [[maybe_unused]] auto nbr_intersection_idx_buffer = - cugraph::detail::allocate_optional_dataframe_buffer(0, handle.get_stream()); + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -1153,23 +1161,17 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); - std::optional> - rx_v_pair_nbr_intersection_properties0{std::nullopt}; - std::optional> - rx_v_pair_nbr_intersection_properties1{std::nullopt}; - - std::optional> rx_v_pair_nbr_intersection_idx_buffer{ - std::nullopt}; + [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); - if constexpr (!std::is_same_v) { - rx_v_pair_nbr_intersection_properties0 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - rx_v_pair_nbr_intersection_properties1 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); + [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); - rx_v_pair_nbr_intersection_idx_buffer = - std::make_optional(rmm::device_uvector(size_t{0}, handle.get_stream())); - } + [[maybe_unused]] auto rx_v_pair_nbr_intersection_idx_buffer = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); { @@ -1234,14 +1236,31 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + optional_property_buffer_t rx_v_pair_optional_nbr_intersection_properties0{}; + optional_property_buffer_t rx_v_pair_optional_nbr_intersection_properties1{}; + optional_idx_buffer_t rx_v_pair_optional_nbr_intersection_idx_buffer{}; + if constexpr (!std::is_same_v) { - (*rx_v_pair_nbr_intersection_properties0) - .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties1) - .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - (*rx_v_pair_nbr_intersection_idx_buffer) - .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_idx_buffer.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + + rx_v_pair_optional_nbr_intersection_properties0 = + raft::device_span(rx_v_pair_nbr_intersection_properties0.data(), + rx_v_pair_nbr_intersection_properties0.size()); + + rx_v_pair_optional_nbr_intersection_properties1 = + raft::device_span(rx_v_pair_nbr_intersection_properties1.data(), + rx_v_pair_nbr_intersection_properties1.size()); + + rx_v_pair_optional_nbr_intersection_idx_buffer = + raft::device_span(rx_v_pair_nbr_intersection_idx_buffer.data(), + rx_v_pair_nbr_intersection_idx_buffer.size()); } + if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); @@ -1261,6 +1280,8 @@ nbr_intersection(raft::handle_t const& handle, edge_t, edge_property_value_t, edge_partition_e_input_device_view_t, + optional_property_buffer_t, + optional_idx_buffer_t, true>{nullptr, raft::device_span(), raft::device_span(), @@ -1279,14 +1300,9 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.size()), raft::device_span(rx_v_pair_nbr_intersection_indices.data(), rx_v_pair_nbr_intersection_indices.size()), - raft::device_span( - (*rx_v_pair_nbr_intersection_properties0).data(), - (*rx_v_pair_nbr_intersection_properties0).size()), - raft::device_span( - (*rx_v_pair_nbr_intersection_properties1).data(), - (*rx_v_pair_nbr_intersection_properties1).size()), - raft::device_span((*rx_v_pair_nbr_intersection_idx_buffer).data(), - (*rx_v_pair_nbr_intersection_idx_buffer).size()), + rx_v_pair_optional_nbr_intersection_properties0, + rx_v_pair_optional_nbr_intersection_properties1, + rx_v_pair_optional_nbr_intersection_idx_buffer, invalid_vertex_id::value}); } else { @@ -1305,8 +1321,8 @@ nbr_intersection(raft::handle_t const& handle, } else { auto common_nbr_and_properties_begin = thrust::make_zip_iterator( thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(), - (*rx_v_pair_nbr_intersection_properties0).begin(), - (*rx_v_pair_nbr_intersection_properties1).begin())); + rx_v_pair_nbr_intersection_properties0.begin(), + rx_v_pair_nbr_intersection_properties1.begin())); auto last = thrust::remove_if( handle.get_thrust_policy(), @@ -1321,13 +1337,13 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties0) - .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties0).shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties1) - .resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties1).shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); } thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1446,17 +1462,13 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); - std::optional> - combined_nbr_intersection_properties0{std::nullopt}; - std::optional> - combined_nbr_intersection_properties1{std::nullopt}; + [[maybe_unused]] auto combined_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + size_t{0}, handle.get_stream()); - if constexpr (!std::is_same_v) { - combined_nbr_intersection_properties0 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - combined_nbr_intersection_properties1 = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - } + [[maybe_unused]] auto combined_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + size_t{0}, handle.get_stream()); { std::vector ranks(minor_comm_size); @@ -1493,53 +1505,47 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - std::optional> - gathered_nbr_intersection_properties0{std::nullopt}; - std::optional> - gathered_nbr_intersection_properties1{std::nullopt}; + [[maybe_unused]] auto gathered_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream()); - if constexpr (!std::is_same_v) { - gathered_nbr_intersection_properties0 = - std::make_optional(rmm::device_uvector( - rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), - handle.get_stream())); - gathered_nbr_intersection_properties1 = - std::make_optional(rmm::device_uvector( - rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), - handle.get_stream())); - } + [[maybe_unused]] auto gathered_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream()); if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, - (*rx_v_pair_nbr_intersection_properties0).begin(), + rx_v_pair_nbr_intersection_properties0.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - (*gathered_nbr_intersection_properties0).begin(), + gathered_nbr_intersection_properties0.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties0).resize(size_t{0}, handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties0).shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); - (*combined_nbr_intersection_properties0) - .resize((*gathered_nbr_intersection_properties0).size(), handle.get_stream()); + combined_nbr_intersection_properties0.resize(gathered_nbr_intersection_properties0.size(), + handle.get_stream()); device_multicast_sendrecv(minor_comm, - (*rx_v_pair_nbr_intersection_properties1).begin(), + rx_v_pair_nbr_intersection_properties1.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - (*gathered_nbr_intersection_properties1).begin(), + gathered_nbr_intersection_properties1.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties1).resize(size_t{0}, handle.get_stream()); - (*rx_v_pair_nbr_intersection_properties1).shrink_to_fit(handle.get_stream()); - (*combined_nbr_intersection_properties1) - .resize((*gathered_nbr_intersection_properties1).size(), handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); + combined_nbr_intersection_properties1.resize(gathered_nbr_intersection_properties1.size(), + handle.get_stream()); } if constexpr (!std::is_same_v) { @@ -1547,7 +1553,7 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1558,29 +1564,25 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_offsets.size()), raft::device_span(combined_nbr_intersection_indices.data(), combined_nbr_intersection_indices.size()), - raft::device_span( - (*gathered_nbr_intersection_properties0).data(), - (*gathered_nbr_intersection_properties0).size()), + gathered_nbr_intersection_properties0.data(), + gathered_nbr_intersection_properties0.size()), raft::device_span( - (*gathered_nbr_intersection_properties1).data(), - (*gathered_nbr_intersection_properties1).size()), - + gathered_nbr_intersection_properties1.data(), + gathered_nbr_intersection_properties1.size()), raft::device_span( - (*combined_nbr_intersection_properties0).data(), - (*combined_nbr_intersection_properties0).size()), + combined_nbr_intersection_properties0.data(), + combined_nbr_intersection_properties0.size()), raft::device_span( - (*combined_nbr_intersection_properties1).data(), - (*combined_nbr_intersection_properties1).size()) - - }); + combined_nbr_intersection_properties1.data(), + combined_nbr_intersection_properties1.size())}); } else { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1601,9 +1603,9 @@ nbr_intersection(raft::handle_t const& handle, std::move(combined_nbr_intersection_indices)); if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_property0.push_back( - std::move((*combined_nbr_intersection_properties0))); + std::move(combined_nbr_intersection_properties0)); edge_partition_nbr_intersection_property1.push_back( - std::move((*combined_nbr_intersection_properties1))); + std::move(combined_nbr_intersection_properties1)); } } @@ -1689,22 +1691,22 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - raft::device_span nbr_intersection_properties0_span{}; - raft::device_span nbr_intersection_properties1_span{}; - raft::device_span nbr_intersection_idx_buffer_span{}; + optional_property_buffer_t optional_nbr_intersection_properties0{}; + optional_property_buffer_t optional_nbr_intersection_properties1{}; + optional_idx_buffer_t optional_nbr_intersection_idx_buffer{}; if constexpr (!std::is_same_v) { nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_idx_buffer.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties0_span = raft::device_span( + optional_nbr_intersection_properties0 = raft::device_span( nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); - nbr_intersection_properties1_span = raft::device_span( + optional_nbr_intersection_properties1 = raft::device_span( nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); - nbr_intersection_idx_buffer_span = raft::device_span( + optional_nbr_intersection_idx_buffer = raft::device_span( nbr_intersection_idx_buffer.data(), nbr_intersection_idx_buffer.size()); } @@ -1720,6 +1722,8 @@ nbr_intersection(raft::handle_t const& handle, edge_t, edge_property_value_t, edge_partition_e_input_device_view_t, + optional_property_buffer_t, + optional_idx_buffer_t, false>{ nullptr, raft::device_span(), @@ -1736,9 +1740,9 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), - nbr_intersection_properties0_span, - nbr_intersection_properties1_span, - nbr_intersection_idx_buffer_span, + optional_nbr_intersection_properties0, + optional_nbr_intersection_properties1, + optional_nbr_intersection_idx_buffer, invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); @@ -1761,24 +1765,6 @@ nbr_intersection(raft::handle_t const& handle, cugraph::detail::allocate_optional_dataframe_buffer( tmp_indices.size(), handle.get_stream()); - raft::device_span tmp_properties0_span{}; - raft::device_span tmp_properties1_span{}; - - if constexpr (!std::is_same_v) { - tmp_properties0_span = - raft::device_span(tmp_properties0.data(), tmp_properties0.size()); - tmp_properties1_span = - raft::device_span(tmp_properties1.data(), tmp_properties1.size()); - } - - auto zipped_itr_to_indices_and_properties_begin = - thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), - nbr_intersection_properties0_span.begin(), - nbr_intersection_properties1_span.begin())); - - auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( - tmp_indices.begin(), tmp_properties0_span.begin(), tmp_properties1_span.begin())); - size_t num_copied{0}; size_t num_scanned{0}; @@ -1796,6 +1782,14 @@ nbr_intersection(raft::handle_t const& handle, tmp_indices.begin() + num_copied, detail::not_equal_t{invalid_vertex_id::value}))); } else { + auto zipped_itr_to_indices_and_properties_begin = + thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), + nbr_intersection_properties0.begin(), + nbr_intersection_properties1.begin())); + + auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( + tmp_indices.begin(), tmp_properties0.begin(), tmp_properties1.begin())); + num_copied += static_cast(thrust::distance( zipped_itr_to_tmps_begin + num_copied, thrust::copy_if(handle.get_thrust_policy(), From 00d95243b40a4eb7974e28f7bf4c619f20ad76f5 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Thu, 27 Jul 2023 12:59:04 -0700 Subject: [PATCH 18/22] Update test code to use multiple common input mtx files --- cpp/tests/link_prediction/mg_similarity_test.cpp | 5 ++--- .../prims/mg_per_v_pair_transform_dst_nbr_intersection.cu | 4 +--- ...mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu | 6 ++++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp index 45dcf8a70e4..c2a0b23c6d7 100644 --- a/cpp/tests/link_prediction/mg_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_similarity_test.cpp @@ -258,9 +258,8 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}), ::testing::Values(Similarity_Usecase{false, true, 20}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx") - // , cugraph::test::File_Usecase("test/datasets/netscience.mtx") - ))); + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); INSTANTIATE_TEST_SUITE_P( rmat_small_test, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index d2d6653627b..a7cd8a989b0 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -329,9 +329,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::Values(Prims_Usecase{size_t{1024}, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), - cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), - cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGPerVPairTransformDstNbrIntersection_Rmat, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index d3398eb0f62..3b6a6b9c4c5 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -377,8 +377,10 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{10}, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{10}, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGPerVPairTransformDstNbrIntersection_Rmat, From 40f33ba2628d5eedb2ffbb1b509ea91a780e6d92 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Thu, 27 Jul 2023 15:29:00 -0700 Subject: [PATCH 19/22] Move optional dataframe buffer related code to a new file --- .../detail/extract_transform_v_frontier_e.cuh | 78 +------------- cpp/src/prims/detail/nbr_intersection.cuh | 3 +- .../detail/optional_dataframe_buffer.hpp | 102 ++++++++++++++++++ 3 files changed, 104 insertions(+), 79 deletions(-) create mode 100644 cpp/src/prims/detail/optional_dataframe_buffer.hpp diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index ac57c8f180a..7c3fbad9153 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -60,83 +61,6 @@ namespace detail { int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512; -// we cannot use thrust::iterator_traits::value_type if Iterator is void* (reference to -// void is not allowed) -template -struct optional_dataframe_buffer_value_type_t; - -template -struct optional_dataframe_buffer_value_type_t>> { - using value = typename thrust::iterator_traits::value_type; -}; - -template -struct optional_dataframe_buffer_value_type_t>> { - using value = void; -}; - -template >* = nullptr> -std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) -{ - return std::byte{0}; // dummy -} - -template >* = nullptr> -auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) -{ - return allocate_dataframe_buffer(size, stream); -} - -template >* = nullptr> -void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer) -{ - return static_cast(nullptr); -} - -template >* = nullptr> -auto get_optional_dataframe_buffer_begin( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer) -{ - return get_dataframe_buffer_begin(optional_dataframe_buffer); -} - -template >* = nullptr> -void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - size_t new_buffer_size, - rmm::cuda_stream_view stream_view) -{ - return; -} - -template >* = nullptr> -void resize_optional_dataframe_buffer( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, - size_t new_buffer_size, - rmm::cuda_stream_view stream_view) -{ - return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); -} - -template >* = nullptr> -void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return; -} - -template >* = nullptr> -void shrink_to_fit_optional_dataframe_buffer( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); -} - template diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index ddc1aa4daf8..9de3d45d394 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -15,10 +15,9 @@ */ #pragma once +#include #include -#include - #include #include #include diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp new file mode 100644 index 00000000000..dd40e6932e4 --- /dev/null +++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { + +namespace detail { + +// we cannot use thrust::iterator_traits::value_type if Iterator is void* (reference to +// void is not allowed) +template +struct optional_dataframe_buffer_value_type_t; + +template +struct optional_dataframe_buffer_value_type_t>> { + using value = typename thrust::iterator_traits::value_type; +}; + +template +struct optional_dataframe_buffer_value_type_t>> { + using value = void; +}; + +template >* = nullptr> +std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) +{ + return std::byte{0}; // dummy +} + +template >* = nullptr> +auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) +{ + return allocate_dataframe_buffer(size, stream); +} + +template >* = nullptr> +void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer) +{ + return static_cast(nullptr); +} + +template >* = nullptr> +auto get_optional_dataframe_buffer_begin( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer) +{ + return get_dataframe_buffer_begin(optional_dataframe_buffer); +} + +template >* = nullptr> +void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) +{ + return; +} + +template >* = nullptr> +void resize_optional_dataframe_buffer( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) +{ + return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); +} + +template >* = nullptr> +void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, + rmm::cuda_stream_view stream_view) +{ + return; +} + +template >* = nullptr> +void shrink_to_fit_optional_dataframe_buffer( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, + rmm::cuda_stream_view stream_view) +{ + return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); +} +} // namespace detail + +} // namespace cugraph From 232d231ea51be6d10c6e5c9c6f0e51b514a72f8b Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 28 Jul 2023 08:57:44 -0700 Subject: [PATCH 20/22] Address PR comments --- cpp/src/prims/detail/nbr_intersection.cuh | 187 +++++++----------- ..._v_pair_transform_dst_nbr_intersection.cuh | 4 +- 2 files changed, 73 insertions(+), 118 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 9de3d45d394..508fd44ad41 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -172,9 +172,8 @@ struct update_rx_major_local_degree_t { template struct update_rx_major_local_nbrs_t { int major_comm_size{}; @@ -191,11 +190,12 @@ struct update_rx_major_local_nbrs_t { raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; raft::device_span local_nbrs_for_rx_majors{}; - optional_property_buffer_t local_nbrs_properties_for_rx_majors{}; + optional_property_buffer_view_t local_nbrs_properties_for_rx_majors{}; __device__ void operator()(size_t idx) { - auto it = thrust::upper_bound( + using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; + auto it = thrust::upper_bound( thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx); auto major_comm_rank = static_cast(thrust::distance(rx_reordered_group_lasts.begin(), it)); auto offset_in_local_edge_partition = @@ -333,21 +333,19 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; - raft::device_span first_element_properties{nullptr}; + optional_property_buffer_view_t first_element_properties{}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; - raft::device_span second_element_properties{nullptr}; + optional_property_buffer_view_t second_element_properties{}; edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; @@ -356,18 +354,16 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - optional_property_buffer_t nbr_intersection_properties0{}; - optional_property_buffer_t nbr_intersection_properties1{}; - optional_idx_buffer_t nbr_intersection_idx_buffer{}; - + optional_property_buffer_view_t nbr_intersection_properties0{}; + optional_property_buffer_view_t nbr_intersection_properties1{}; vertex_t invalid_id{}; - __device__ edge_t operator()(size_t i) { - auto pair = *(vertex_pair_first + i); + using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; + auto pair = *(vertex_pair_first + i); vertex_t const* indices0{nullptr}; - edge_property_value_t const* property0{nullptr}; + edge_property_value_t const* properties0{nullptr}; [[maybe_unused]] edge_t local_edge_offset0{0}; edge_t local_degree0{0}; @@ -393,7 +389,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } if constexpr (!std::is_same_v) { - property0 = edge_partition_e_value_input.value_first() + local_edge_offset0; + properties0 = edge_partition_e_value_input.value_first() + local_edge_offset0; } } else { @@ -403,12 +399,12 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { indices0 = first_element_indices.begin() + first_element_offsets[idx]; local_edge_offset0 = first_element_offsets[idx]; if constexpr (!std::is_same_v) { - property0 = first_element_properties.begin() + local_edge_offset0; + properties0 = first_element_properties.begin() + local_edge_offset0; } } vertex_t const* indices1{nullptr}; - edge_property_value_t const* property1{nullptr}; + edge_property_value_t const* properties1{nullptr}; [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; @@ -434,7 +430,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } if constexpr (!std::is_same_v) { - property1 = edge_partition_e_value_input.value_first() + local_edge_offset1; + properties1 = edge_partition_e_value_input.value_first() + local_edge_offset1; } } else { @@ -445,7 +441,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { local_edge_offset1 = second_element_offsets[idx]; if constexpr (!std::is_same_v) { - property1 = second_element_properties.begin() + local_edge_offset1; + properties1 = second_element_properties.begin() + local_edge_offset1; } } @@ -472,40 +468,31 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; // copy edge properties from first vertex to common neighbors - thrust::lower_bound(thrust::seq, - indices0, - indices0 + local_degree0, - nbr_intersection_first, - nbr_intersection_last, - nbr_intersection_idx_buffer.begin() + - nbr_intersection_offsets[i], // ip0_start, // indices - thrust::less()); - thrust::transform( - thrust::seq, - nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i], - nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i] + insection_size, - ip0_start, - [property0] __device__(auto idx) { return property0[static_cast(idx)]; }); + thrust::transform(thrust::seq, + nbr_intersection_first, + nbr_intersection_last, + ip0_start, + [indices0, local_degree0, properties0] __device__(auto v) { + auto position = + thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v); + + return properties0[thrust::distance(indices0, position)]; + }); auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; // copy edge properties from second vertex to common neighbors - thrust::lower_bound(thrust::seq, - indices1, - indices1 + local_degree1, - nbr_intersection_first, - nbr_intersection_last, - nbr_intersection_idx_buffer.begin() + - nbr_intersection_offsets[i], // ip1_start, // indices - thrust::less()); - - thrust::transform( - thrust::seq, - nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i], - nbr_intersection_idx_buffer.begin() + nbr_intersection_offsets[i] + insection_size, - ip1_start, - [property1] __device__(auto idx) { return property1[static_cast(idx)]; }); + thrust::transform(thrust::seq, + nbr_intersection_first, + nbr_intersection_last, + ip1_start, + [indices1, local_degree1, properties1] __device__(auto v) { + auto position = + thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v); + + return properties1[thrust::distance(indices1, position)]; + }); } return insection_size; } @@ -527,7 +514,9 @@ struct strided_accumulate_t { } }; -template +template struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -537,10 +526,10 @@ struct gatherv_indices_t { raft::device_span combined_nbr_intersection_offsets{}; raft::device_span combined_nbr_intersection_indices{}; - optional_property_buffer_t gathered_nbr_intersection_properties0{}; - optional_property_buffer_t gathered_nbr_intersection_properties1{}; - optional_property_buffer_t combined_nbr_intersection_properties0{}; - optional_property_buffer_t combined_nbr_intersection_properties1{}; + optional_property_buffer_view_t gathered_nbr_intersection_properties0{}; + optional_property_buffer_view_t gathered_nbr_intersection_properties1{}; + optional_property_buffer_view_t combined_nbr_intersection_properties0{}; + optional_property_buffer_view_t combined_nbr_intersection_properties1{}; __device__ void operator()(size_t i) const { @@ -704,16 +693,11 @@ nbr_intersection(raft::handle_t const& handle, using optional_property_buffer_idx_type = std::conditional_t, vertex_t, void>; - using optional_property_buffer_t = + using optional_property_buffer_view_t = std::conditional_t, raft::device_span, std::byte>; - using optional_idx_buffer_t = - std::conditional_t, - raft::device_span, - std::byte>; - static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -750,7 +734,10 @@ nbr_intersection(raft::handle_t const& handle, std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; - std::optional> major_nbr_properties{std::nullopt}; + [[maybe_unused]] auto major_nbr_properties = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + optional_property_buffer_view_t optional_major_nbr_properties{}; if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { @@ -764,8 +751,6 @@ nbr_intersection(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto const comm_rank = handle.get_comms().get_rank(); - // 2.1 Find unique second pair element majors rmm::device_uvector unique_majors(input_size, handle.get_stream()); @@ -947,7 +932,7 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_t optional_local_nbrs_properties{}; + optional_property_buffer_view_t optional_local_nbrs_properties{}; if constexpr (!std::is_same_v) { local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), @@ -974,9 +959,8 @@ nbr_intersection(raft::handle_t const& handle, thrust::make_counting_iterator(reordered_idx_last), update_rx_major_local_nbrs_t{ major_comm_size, minor_comm_size, @@ -1039,14 +1023,14 @@ nbr_intersection(raft::handle_t const& handle, major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { - major_nbr_properties = std::make_optional( - rmm::device_uvector(size_t{0}, handle.get_stream())); - - std::tie(*major_nbr_properties, std::ignore) = + std::tie(major_nbr_properties, std::ignore) = shuffle_values(major_comm, local_nbrs_properties_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); + + optional_major_nbr_properties = raft::device_span( + major_nbr_properties.data(), major_nbr_properties.size()); } major_to_idx_map_ptr = std::make_unique>( @@ -1085,10 +1069,6 @@ nbr_intersection(raft::handle_t const& handle, cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto nbr_intersection_idx_buffer = - cugraph::detail::allocate_optional_dataframe_buffer( - 0, handle.get_stream()); - if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_rank = minor_comm.get_rank(); @@ -1132,9 +1112,10 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - std::vector> + [[maybe_unused]] std::vector> edge_partition_nbr_intersection_property0{}; - std::vector> + + [[maybe_unused]] std::vector> edge_partition_nbr_intersection_property1{}; if constexpr (!std::is_same_v) { @@ -1168,10 +1149,6 @@ nbr_intersection(raft::handle_t const& handle, cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto rx_v_pair_nbr_intersection_idx_buffer = - cugraph::detail::allocate_optional_dataframe_buffer( - 0, handle.get_stream()); - std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); { auto vertex_pair_buffer = allocate_dataframe_buffer>( @@ -1235,17 +1212,14 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_t rx_v_pair_optional_nbr_intersection_properties0{}; - optional_property_buffer_t rx_v_pair_optional_nbr_intersection_properties1{}; - optional_idx_buffer_t rx_v_pair_optional_nbr_intersection_idx_buffer{}; + optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties0{}; + optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties1{}; if constexpr (!std::is_same_v) { rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - rx_v_pair_nbr_intersection_idx_buffer.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); rx_v_pair_optional_nbr_intersection_properties0 = raft::device_span(rx_v_pair_nbr_intersection_properties0.data(), @@ -1254,17 +1228,9 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_optional_nbr_intersection_properties1 = raft::device_span(rx_v_pair_nbr_intersection_properties1.data(), rx_v_pair_nbr_intersection_properties1.size()); - - rx_v_pair_optional_nbr_intersection_idx_buffer = - raft::device_span(rx_v_pair_nbr_intersection_idx_buffer.data(), - rx_v_pair_nbr_intersection_idx_buffer.size()); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto const comm_size = comm.get_size(); - auto second_element_to_idx_map = detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( @@ -1277,21 +1243,18 @@ nbr_intersection(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer)), vertex_t, edge_t, - edge_property_value_t, edge_partition_e_input_device_view_t, - optional_property_buffer_t, - optional_idx_buffer_t, + optional_property_buffer_view_t, true>{nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + optional_property_buffer_view_t{}, second_element_to_idx_map, raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), raft::device_span((*major_nbr_indices).data(), (*major_nbr_indices).size()), - raft::device_span((*major_nbr_properties).data(), - (*major_nbr_properties).size()), + optional_major_nbr_properties, edge_partition, edge_partition_e_value_input, get_dataframe_buffer_begin(vertex_pair_buffer), @@ -1301,7 +1264,7 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_indices.size()), rx_v_pair_optional_nbr_intersection_properties0, rx_v_pair_optional_nbr_intersection_properties1, - rx_v_pair_optional_nbr_intersection_idx_buffer, + invalid_vertex_id::value}); } else { @@ -1552,7 +1515,7 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1581,7 +1544,7 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1690,23 +1653,18 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_t optional_nbr_intersection_properties0{}; - optional_property_buffer_t optional_nbr_intersection_properties1{}; - optional_idx_buffer_t optional_nbr_intersection_idx_buffer{}; + optional_property_buffer_view_t optional_nbr_intersection_properties0{}; + optional_property_buffer_view_t optional_nbr_intersection_properties1{}; if constexpr (!std::is_same_v) { nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_idx_buffer.resize(nbr_intersection_indices.size(), handle.get_stream()); optional_nbr_intersection_properties0 = raft::device_span( nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); optional_nbr_intersection_properties1 = raft::device_span( nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); - - optional_nbr_intersection_idx_buffer = raft::device_span( - nbr_intersection_idx_buffer.data(), nbr_intersection_idx_buffer.size()); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { @@ -1719,19 +1677,17 @@ nbr_intersection(raft::handle_t const& handle, decltype(vertex_pair_first), vertex_t, edge_t, - edge_property_value_t, edge_partition_e_input_device_view_t, - optional_property_buffer_t, - optional_idx_buffer_t, + optional_property_buffer_view_t, false>{ nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + optional_property_buffer_view_t{}, nullptr, raft::device_span(), raft::device_span(), - raft::device_span(), + optional_property_buffer_view_t{}, edge_partition, edge_partition_e_value_input, vertex_pair_first, @@ -1741,7 +1697,6 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.size()), optional_nbr_intersection_properties0, optional_nbr_intersection_properties1, - optional_nbr_intersection_idx_buffer, invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index caf3771e02c..640c3c04bfd 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -135,12 +135,12 @@ struct call_intersection_op_t { std::conditional_t, raft::device_span, - std::byte> + std::byte /* dummy */> properties0{}; std::conditional_t, raft::device_span, - std::byte> + std::byte /* dummy */> properties1{}; if constexpr (!std::is_same_v) { From 73ccaff880cbd81640792c7eb8c32986164bca25 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 28 Jul 2023 17:36:04 -0700 Subject: [PATCH 21/22] Address additional PR comments --- cpp/src/link_prediction/similarity_impl.cuh | 30 ++++++--- cpp/src/prims/detail/nbr_intersection.cuh | 75 +++++++++++---------- 2 files changed, 63 insertions(+), 42 deletions(-) diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index af61f552078..55e8f5c88d7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -97,14 +97,28 @@ rmm::device_uvector similarity( weight_t sum_of_intersected_a = weight_t{0}; weight_t sum_of_intersected_b = weight_t{0}; - for (size_t k = 0; k < intersection.size(); k++) { - sum_of_min_weight_a_intersect_b += - std::min(intersected_properties_a[k], intersected_properties_b[k]); - sum_of_max_weight_a_intersect_b += - std::max(intersected_properties_a[k], intersected_properties_b[k]); - sum_of_intersected_a += intersected_properties_a[k]; - sum_of_intersected_b += intersected_properties_b[k]; - } + auto pair_first = thrust::make_zip_iterator(intersected_properties_a.data(), + intersected_properties_b.data()); + thrust::tie(sum_of_min_weight_a_intersect_b, + sum_of_max_weight_a_intersect_b, + sum_of_intersected_a, + sum_of_intersected_b) = + thrust::transform_reduce( + thrust::seq, + pair_first, + pair_first + intersected_properties_a.size(), + [] __device__(auto property_pair) { + auto prop_a = thrust::get<0>(property_pair); + auto prop_b = thrust::get<1>(property_pair); + return thrust::make_tuple(min(prop_a, prop_b), max(prop_a, prop_b), prop_a, prop_b); + }, + thrust::make_tuple(weight_t{0}, weight_t{0}, weight_t{0}, weight_t{0}), + [] __device__(auto lhs, auto rhs) { + return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), + thrust::get<1>(lhs) + thrust::get<1>(rhs), + thrust::get<2>(lhs) + thrust::get<2>(rhs), + thrust::get<3>(lhs) + thrust::get<3>(rhs)); + }); weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 508fd44ad41..7685872c4f6 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -360,12 +360,16 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { __device__ edge_t operator()(size_t i) { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; - auto pair = *(vertex_pair_first + i); + using optional_const_property_buffer_view_t = + std::conditional_t, + raft::device_span, + std::byte /* dummy */>; + auto pair = *(vertex_pair_first + i); vertex_t const* indices0{nullptr}; - edge_property_value_t const* properties0{nullptr}; + optional_const_property_buffer_view_t properties0{}; - [[maybe_unused]] edge_t local_edge_offset0{0}; + edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { vertex_t major = thrust::get<0>(pair); @@ -389,22 +393,24 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } if constexpr (!std::is_same_v) { - properties0 = edge_partition_e_value_input.value_first() + local_edge_offset0; + properties0 = raft::device_span( + edge_partition_e_value_input.value_first() + local_edge_offset0, local_degree0); } } else { - auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); - local_degree0 = - static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); - indices0 = first_element_indices.begin() + first_element_offsets[idx]; + auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_edge_offset0 = first_element_offsets[idx]; + local_degree0 = static_cast(first_element_offsets[idx + 1] - local_edge_offset0); + indices0 = first_element_indices.begin() + local_edge_offset0; + if constexpr (!std::is_same_v) { - properties0 = first_element_properties.begin() + local_edge_offset0; + properties0 = raft::device_span( + first_element_properties.begin() + local_edge_offset0, local_degree0); } } vertex_t const* indices1{nullptr}; - edge_property_value_t const* properties1{nullptr}; + optional_const_property_buffer_view_t properties1{}; [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; @@ -430,18 +436,19 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } if constexpr (!std::is_same_v) { - properties1 = edge_partition_e_value_input.value_first() + local_edge_offset1; + properties1 = raft::device_span( + edge_partition_e_value_input.value_first() + local_edge_offset1, local_degree1); } } else { - auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); - local_degree1 = - static_cast(second_element_offsets[idx + 1] - second_element_offsets[idx]); - indices1 = second_element_indices.begin() + second_element_offsets[idx]; - + auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_edge_offset1 = second_element_offsets[idx]; + local_degree1 = static_cast(second_element_offsets[idx + 1] - local_edge_offset1); + indices1 = second_element_indices.begin() + local_edge_offset1; + if constexpr (!std::is_same_v) { - properties1 = second_element_properties.begin() + local_edge_offset1; + properties1 = raft::device_span( + second_element_properties.begin() + local_edge_offset1, local_degree1); } } @@ -468,7 +475,6 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; // copy edge properties from first vertex to common neighbors - thrust::transform(thrust::seq, nbr_intersection_first, nbr_intersection_last, @@ -476,7 +482,6 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { [indices0, local_degree0, properties0] __device__(auto v) { auto position = thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v); - return properties0[thrust::distance(indices0, position)]; }); @@ -490,7 +495,6 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { [indices1, local_degree1, properties1] __device__(auto v) { auto position = thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v); - return properties1[thrust::distance(indices1, position)]; }); } @@ -677,26 +681,30 @@ nbr_intersection(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; - using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_edge_dummy_property_device_view_t, - detail::edge_partition_edge_property_device_view_t< - edge_t, - typename EdgeValueInputIterator::value_iterator, - typename EdgeValueInputIterator::value_type>>; - using edge_property_value_t = typename EdgeValueInputIterator::value_type; + + using edge_partition_e_input_device_view_t = + std::conditional_t, + detail::edge_partition_edge_dummy_property_device_view_t, + detail::edge_partition_edge_property_device_view_t< + edge_t, + typename EdgeValueInputIterator::value_iterator, + edge_property_value_t>>; + using optional_property_buffer_value_type = std::conditional_t, edge_property_value_t, void>; - using optional_property_buffer_idx_type = - std::conditional_t, vertex_t, void>; using optional_property_buffer_view_t = std::conditional_t, raft::device_span, - std::byte>; + std::byte /* dummy */>; + + using optional_nbr_intersected_edge_partitions_t = + std::conditional_t, + std::vector>, + std::byte /* dummy */>; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -1112,10 +1120,9 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - [[maybe_unused]] std::vector> + [[maybe_unused]] optional_nbr_intersected_edge_partitions_t edge_partition_nbr_intersection_property0{}; - - [[maybe_unused]] std::vector> + [[maybe_unused]] optional_nbr_intersected_edge_partitions_t edge_partition_nbr_intersection_property1{}; if constexpr (!std::is_same_v) { From 7295838811c145f039253117c46bf55f6f529510 Mon Sep 17 00:00:00 2001 From: Md Naim Date: Fri, 28 Jul 2023 18:07:16 -0700 Subject: [PATCH 22/22] Remove duplicate code --- cpp/src/prims/detail/nbr_intersection.cuh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 7685872c4f6..f4c4745b14c 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -558,10 +558,6 @@ struct gatherv_indices_t { zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1], zipped_combined_begin + output_offset); - - output_offset += gathered_intersection_offsets[output_size * j + i + 1] - - gathered_intersection_offsets[output_size * j + i]; - } else { thrust::copy(thrust::seq, gathered_intersection_indices.begin() + @@ -569,9 +565,9 @@ struct gatherv_indices_t { gathered_intersection_indices.begin() + gathered_intersection_offsets[output_size * j + i + 1], combined_nbr_intersection_indices.begin() + output_offset); - output_offset += gathered_intersection_offsets[output_size * j + i + 1] - - gathered_intersection_offsets[output_size * j + i]; } + output_offset += gathered_intersection_offsets[output_size * j + i + 1] - + gathered_intersection_offsets[output_size * j + i]; } } };