diff --git a/cpp/include/cugraph/prims/detail/nbr_intersection.cuh b/cpp/include/cugraph/prims/detail/nbr_intersection.cuh index 0d058efe5f6..a2797c32fae 100644 --- a/cpp/include/cugraph/prims/detail/nbr_intersection.cuh +++ b/cpp/include/cugraph/prims/detail/nbr_intersection.cuh @@ -852,7 +852,7 @@ nbr_intersection(raft::handle_t const& handle, std::max(static_cast(static_cast(unique_majors.size()) / load_factor), static_cast(unique_majors.size()) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); auto pair_first = thrust::make_zip_iterator(unique_majors.begin(), diff --git a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 03dd160509a..19966783719 100644 --- a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -234,6 +234,11 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( VertexIterator map_unique_key_first, VertexIterator map_unique_key_last, ValueIterator map_value_first, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map in + // collect_values_for_unique_keys, need to compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif KeyAggregatedEdgeOp key_aggregated_e_op, T init, ReduceOp reduce_op, @@ -313,8 +318,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( static_cast(thrust::distance(map_unique_key_first, map_unique_key_last)) / load_factor), static_cast(thrust::distance(map_unique_key_first, map_unique_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); @@ -586,8 +591,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto multi_gpu_kv_map_ptr = std::make_unique< cuco::static_map>( size_t{0}, - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); // relevant only when GraphViewType::is_multi_gpu is true if constexpr (GraphViewType::is_multi_gpu) { @@ -624,8 +629,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_minor_keys.size()) / load_factor), static_cast(unique_minor_keys.size()) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index fc448aa36b0..a72d44b6aee 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -61,6 +61,11 @@ collect_values_for_keys(raft::comms::comms_t const& comm, VertexIterator1 collect_key_first, VertexIterator1 collect_key_last, KeyToGPUIdOp key_to_gpu_id_op, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to + // compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif rmm::cuda_stream_view stream_view) { using vertex_t = typename thrust::iterator_traits::value_type; @@ -84,8 +89,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm, std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { @@ -147,8 +152,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm, // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { @@ -224,6 +229,11 @@ collect_values_for_unique_keys( rmm::device_uvector::value_type>&& collect_unique_keys, KeyToGPUIdOp key_to_gpu_id_op, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to + // compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif rmm::cuda_stream_view stream_view) { using vertex_t = typename thrust::iterator_traits::value_type; @@ -244,8 +254,8 @@ collect_values_for_unique_keys( std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e0c9a42888e..e52a3703f18 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -446,14 +446,17 @@ class Louvain { cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ handle_.get_comms().get_size()}; - vertex_cluster_weights_v = cugraph::collect_values_for_keys(handle_.get_comms(), - cluster_keys_v_.begin(), - cluster_keys_v_.end(), - cluster_weights_v_.data(), - next_clusters_v_.begin(), - next_clusters_v_.end(), - vertex_to_gpu_id_op, - handle_.get_stream()); + vertex_cluster_weights_v = + cugraph::collect_values_for_keys(handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + next_clusters_v_.begin(), + next_clusters_v_.end(), + vertex_to_gpu_id_op, + invalid_vertex_id::value, + std::numeric_limits::max(), + handle_.get_stream()); src_cluster_weights = edge_partition_src_property_t(handle_, current_graph_view_); @@ -533,6 +536,8 @@ class Louvain { cluster_keys_v_.begin(), cluster_keys_v_.end(), cluster_weights_v_.begin(), + invalid_vertex_id::value, + std::numeric_limits::max(), detail::key_aggregated_edge_op_t{total_edge_weight, resolution}, thrust::make_tuple(vertex_t{-1}, weight_t{0}), detail::reduce_op_t{}, diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh index e7206a42b7b..cb5e7066dba 100644 --- a/cpp/src/structure/relabel_impl.cuh +++ b/cpp/src/structure/relabel_impl.cuh @@ -120,7 +120,7 @@ void relabel(raft::handle_t const& handle, static_cast(rx_label_pair_old_labels.size()) / load_factor), rx_label_pair_old_labels.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; @@ -185,7 +185,7 @@ void relabel(raft::handle_t const& handle, std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), unique_old_labels.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; @@ -212,7 +212,7 @@ void relabel(raft::handle_t const& handle, std::max(static_cast(static_cast(num_label_pairs) / load_factor), static_cast(num_label_pairs) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index 69043813069..26b18f584f0 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -760,7 +760,7 @@ renumber_edgelist( load_factor), static_cast(partition.local_edge_partition_major_range_size(i)) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( @@ -800,9 +800,6 @@ renumber_edgelist( i, handle.get_stream()); - RAFT_CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, handle.get_stream()); @@ -811,7 +808,7 @@ renumber_edgelist( std::max(static_cast(static_cast(segment_size) / load_factor), static_cast(segment_size) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( @@ -857,7 +854,7 @@ renumber_edgelist( static_cast(renumber_map_minor_labels.size()) / load_factor), renumber_map_minor_labels.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( @@ -930,7 +927,7 @@ renumber_edgelist(raft::handle_t const& handle, std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), renumber_map_labels.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator( diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index 2e8c162be89..8204584b2a0 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -203,7 +203,7 @@ void unrenumber_local_int_edges( static_cast(edge_partition_major_range_size) / load_factor), static_cast(edge_partition_major_range_size) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator( @@ -266,7 +266,7 @@ void unrenumber_local_int_edges( std::max(static_cast(static_cast(segment_size) / load_factor), static_cast(segment_size) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator( @@ -319,7 +319,7 @@ void unrenumber_local_int_edges( static_cast(renumber_map_minor_labels.size()) / load_factor), renumber_map_minor_labels.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()}; auto pair_first = thrust::make_zip_iterator( @@ -373,7 +373,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, cuco::static_map>( size_t{0}, cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); if (multi_gpu) { @@ -410,6 +410,8 @@ void renumber_ext_vertices(raft::handle_t const& handle, thrust::make_counting_iterator(local_int_vertex_first), std::move(sorted_unique_ext_vertices), detail::compute_gpu_id_from_ext_vertex_t{comm_size}, + invalid_vertex_id::value, + invalid_vertex_id::value, handle.get_stream()); renumber_map_ptr.reset(); @@ -421,7 +423,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), sorted_unique_ext_vertices.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); @@ -442,7 +444,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), static_cast(local_int_vertex_last - local_int_vertex_first) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); @@ -508,7 +510,7 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), static_cast(local_int_vertex_last - local_int_vertex_first) + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()); @@ -677,7 +679,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, static_cast(sorted_unique_int_vertices.size()) / load_factor), sorted_unique_int_vertices.size() + 1), cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_value{invalid_vertex_id::value}, stream_adapter, handle.get_stream()};