From 34f3643fff275cd1c53c432d45f76449a03b0099 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 14 Jul 2022 14:46:43 -0700 Subject: [PATCH 1/5] enable concurrent broadcasts in update_edge_partition_src_dst_prpoerty --- ...update_edge_partition_src_dst_property.cuh | 40 ++----------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/cpp/src/prims/update_edge_partition_src_dst_property.cuh b/cpp/src/prims/update_edge_partition_src_dst_property.cuh index 8cee40d3438..23ea94c69b5 100644 --- a/cpp/src/prims/update_edge_partition_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_partition_src_dst_property.cuh @@ -294,8 +294,6 @@ void update_edge_partition_minor_property( key_offsets = *(graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets()); } -#if 0 // FIXME: should not directly call ncclGroupStart(), ncclGroupEnd(), enable after adding a - // raft::comms mechanism to group broadcast operations // memory footprint vs parallelism trade-off // memory requirement per loop is // (V/comm_size) * sizeof(value_t) @@ -327,10 +325,7 @@ void update_edge_partition_minor_property( } for (size_t round = 0; round < num_rounds; ++round) { - // FIXME: better use a higher-level interface than directly invoking NCCL functions. - if (ncclGroupStart() != ncclSuccess) { - CUGRAPH_FAIL("ncclGroupStart failure."); - } + row_comm.group_start(); for (size_t i = 0; i < num_concurrent_bcasts; ++i) { auto j = num_rounds * i + round; if (j < static_cast(row_comm_size)) { @@ -343,10 +338,8 @@ void update_edge_partition_minor_property( handle.get_stream()); } } - // FIXME: better use a higher-level interface than directly invoking NCCL functions. - if (ncclGroupEnd() != ncclSuccess) { - CUGRAPH_FAIL("ncclGroupEnd failure."); - } + row_comm.group_end(); + for (size_t i = 0; i < num_concurrent_bcasts; ++i) { auto j = num_rounds * i + round; if (j < static_cast(row_comm_size)) { @@ -363,33 +356,6 @@ void update_edge_partition_minor_property( } } } -#else - vertex_t max_rx_size{0}; - for (int i = 0; i < row_comm_size; ++i) { - max_rx_size = std::max( - max_rx_size, graph_view.vertex_partition_range_size(col_comm_rank * row_comm_size + i)); - } - auto rx_value_buffer = allocate_dataframe_buffer(max_rx_size, handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin(rx_value_buffer); - for (int i = 0; i < row_comm_size; ++i) { - device_bcast(row_comm, - vertex_property_input_first, - rx_value_first, - graph_view.vertex_partition_range_size(col_comm_rank * row_comm_size + i), - i, - handle.get_stream()); - - auto v_offset_first = thrust::make_transform_iterator( - (*(edge_partition_minor_property_output.keys())).begin() + key_offsets[i], - [v_first = graph_view.vertex_partition_range_first( - col_comm_rank * row_comm_size + i)] __device__(auto v) { return v - v_first; }); - thrust::gather(handle.get_thrust_policy(), - v_offset_first, - v_offset_first + (key_offsets[i + 1] - key_offsets[i]), - rx_value_first, - edge_partition_minor_property_output.value_first() + key_offsets[i]); - } -#endif } else { std::vector rx_counts(row_comm_size, size_t{0}); std::vector displacements(row_comm_size, size_t{0}); From 659f049e05dd53c4507da8f6aa64b9a3ec67b7b0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 15 Jul 2022 00:49:35 -0700 Subject: [PATCH 2/5] added device_group_start|end --- cpp/include/cugraph/utilities/device_comm.cuh | 8 ++++++++ cpp/src/prims/update_edge_partition_src_dst_property.cuh | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/include/cugraph/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh index 6d342aad06e..c77e2c4516b 100644 --- a/cpp/include/cugraph/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -1091,4 +1091,12 @@ device_gatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } +void device_group_start(raft::comms::comms_t const& comm) { + comm.group_start(); +} + +void device_group_end(raft::comms::comms_t const& comm) { + comm.group_end(); +} + } // namespace cugraph diff --git a/cpp/src/prims/update_edge_partition_src_dst_property.cuh b/cpp/src/prims/update_edge_partition_src_dst_property.cuh index 23ea94c69b5..ffa1baf37fa 100644 --- a/cpp/src/prims/update_edge_partition_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_partition_src_dst_property.cuh @@ -325,7 +325,7 @@ void update_edge_partition_minor_property( } for (size_t round = 0; round < num_rounds; ++round) { - row_comm.group_start(); + device_group_start(row_comm); for (size_t i = 0; i < num_concurrent_bcasts; ++i) { auto j = num_rounds * i + round; if (j < static_cast(row_comm_size)) { @@ -338,7 +338,7 @@ void update_edge_partition_minor_property( handle.get_stream()); } } - row_comm.group_end(); + device_group_end(row_comm); for (size_t i = 0; i < num_concurrent_bcasts; ++i) { auto j = num_rounds * i + round; From fb3824dcb3e4c21c3e39b20d1762fec2fbfd50eb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 19 Jul 2022 09:38:17 -0700 Subject: [PATCH 3/5] clang-format --- cpp/include/cugraph/utilities/device_comm.cuh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/include/cugraph/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh index c77e2c4516b..3612eb1fba2 100644 --- a/cpp/include/cugraph/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -1091,12 +1091,8 @@ device_gatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } -void device_group_start(raft::comms::comms_t const& comm) { - comm.group_start(); -} +void device_group_start(raft::comms::comms_t const& comm) { comm.group_start(); } -void device_group_end(raft::comms::comms_t const& comm) { - comm.group_end(); -} +void device_group_end(raft::comms::comms_t const& comm) { comm.group_end(); } } // namespace cugraph From 93896fd90d7b313654001ac12051a0fe4753a340 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 19 Jul 2022 12:47:21 -0700 Subject: [PATCH 4/5] inline device_group_start/end --- cpp/include/cugraph/utilities/device_comm.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cugraph/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh index 3612eb1fba2..27e3512ccf5 100644 --- a/cpp/include/cugraph/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -1091,8 +1091,8 @@ device_gatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream_view); } -void device_group_start(raft::comms::comms_t const& comm) { comm.group_start(); } +inline void device_group_start(raft::comms::comms_t const& comm) { comm.group_start(); } -void device_group_end(raft::comms::comms_t const& comm) { comm.group_end(); } +inline void device_group_end(raft::comms::comms_t const& comm) { comm.group_end(); } } // namespace cugraph From 80fff0846e8db171923710f369e656faf8d055d1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 19 Jul 2022 13:32:09 -0700 Subject: [PATCH 5/5] fix compiler warning --- cpp/src/structure/graph_impl.cuh | 36 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index 12e5a5dc234..1811ae196b7 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -999,20 +999,18 @@ graph_t( + edgelists[i], + major_range_first, use_dcs ? std::optional{major_range_first + (*edge_partition_segment_offsets_) [(*(meta.segment_offsets)).size() * i + detail::num_sparse_segments_per_vertex_partition]} - : std::nullopt; - auto [offsets, indices, weights, dcs_nzd_vertices] = - compress_edgelist(edgelists[i], - major_range_first, - major_hypersparse_first, - major_range_last, - minor_range_first, - minor_range_last, - handle.get_stream()); + : std::nullopt, + major_range_last, + minor_range_first, + minor_range_last, + handle.get_stream()); edge_partition_offsets_.push_back(std::move(offsets)); edge_partition_indices_.push_back(std::move(indices)); @@ -1149,20 +1147,18 @@ graph_t( + edgelists[i], + major_range_first, use_dcs ? std::optional{major_range_first + (*edge_partition_segment_offsets_) [(*(meta.segment_offsets)).size() * i + detail::num_sparse_segments_per_vertex_partition]} - : std::nullopt; - auto [offsets, indices, weights, dcs_nzd_vertices] = - compress_edgelist(edgelists[i], - major_range_first, - major_hypersparse_first, - major_range_last, - minor_range_first, - minor_range_last, - handle.get_stream()); + : std::nullopt, + major_range_last, + minor_range_first, + minor_range_last, + handle.get_stream()); edgelist_src_partitions[i].resize(0, handle.get_stream()); edgelist_src_partitions[i].shrink_to_fit(handle.get_stream()); edgelist_dst_partitions[i].resize(0, handle.get_stream());