Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix (providing invalid sentinel value for cuCollection). #2382

Merged
merged 2 commits into from
Jul 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/cugraph/prims/detail/nbr_intersection.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ nbr_intersection(raft::handle_t const& handle,
std::max(static_cast<size_t>(static_cast<double>(unique_majors.size()) / load_factor),
static_cast<size_t>(unique_majors.size()) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());
auto pair_first = thrust::make_zip_iterator(unique_majors.begin(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
VertexIterator map_unique_key_first,
VertexIterator map_unique_key_last,
ValueIterator map_value_first,
#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map in
// collect_values_for_unique_keys, need to compare the two approaches
typename thrust::iterator_traits<VertexIterator>::value_type invalid_key,
typename thrust::iterator_traits<ValueIterator>::value_type invalid_value,
#endif
KeyAggregatedEdgeOp key_aggregated_e_op,
T init,
ReduceOp reduce_op,
Expand Down Expand Up @@ -313,8 +318,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
static_cast<double>(thrust::distance(map_unique_key_first, map_unique_key_last)) /
load_factor),
static_cast<size_t>(thrust::distance(map_unique_key_first, map_unique_key_last)) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
handle.get_stream());

Expand Down Expand Up @@ -586,8 +591,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
auto multi_gpu_kv_map_ptr = std::make_unique<
cuco::static_map<vertex_t, value_t, cuda::thread_scope_device, decltype(stream_adapter)>>(
size_t{0},
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
handle.get_stream()); // relevant only when GraphViewType::is_multi_gpu is true
if constexpr (GraphViewType::is_multi_gpu) {
Expand Down Expand Up @@ -624,8 +629,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
// cuco::static_map requires at least one empty slot
std::max(static_cast<size_t>(static_cast<double>(unique_minor_keys.size()) / load_factor),
static_cast<size_t>(unique_minor_keys.size()) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
handle.get_stream());

Expand Down
22 changes: 16 additions & 6 deletions cpp/include/cugraph/utilities/collect_comm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ collect_values_for_keys(raft::comms::comms_t const& comm,
VertexIterator1 collect_key_first,
VertexIterator1 collect_key_last,
KeyToGPUIdOp key_to_gpu_id_op,
#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to
// compare the two approaches
typename thrust::iterator_traits<VertexIterator0>::value_type invalid_key,
typename thrust::iterator_traits<ValueIterator>::value_type invalid_value,
#endif
rmm::cuda_stream_view stream_view)
{
using vertex_t = typename thrust::iterator_traits<VertexIterator0>::value_type;
Expand All @@ -84,8 +89,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm,
std::max(static_cast<size_t>(
static_cast<double>(thrust::distance(map_key_first, map_key_last)) / load_factor),
static_cast<size_t>(thrust::distance(map_key_first, map_key_last)) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
stream_view);
{
Expand Down Expand Up @@ -147,8 +152,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm,
// cuco::static_map requires at least one empty slot
std::max(static_cast<size_t>(static_cast<double>(unique_keys.size()) / load_factor),
unique_keys.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
stream_view);
{
Expand Down Expand Up @@ -224,6 +229,11 @@ collect_values_for_unique_keys(
rmm::device_uvector<typename thrust::iterator_traits<VertexIterator>::value_type>&&
collect_unique_keys,
KeyToGPUIdOp key_to_gpu_id_op,
#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to
// compare the two approaches
typename thrust::iterator_traits<VertexIterator>::value_type invalid_key,
typename thrust::iterator_traits<ValueIterator>::value_type invalid_value,
#endif
rmm::cuda_stream_view stream_view)
{
using vertex_t = typename thrust::iterator_traits<VertexIterator>::value_type;
Expand All @@ -244,8 +254,8 @@ collect_values_for_unique_keys(
std::max(static_cast<size_t>(
static_cast<double>(thrust::distance(map_key_first, map_key_last)) / load_factor),
static_cast<size_t>(thrust::distance(map_key_first, map_key_last)) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<value_t>{0},
cuco::sentinel::empty_key<vertex_t>{invalid_key},
cuco::sentinel::empty_value<value_t>{invalid_value},
stream_adapter,
stream_view);
{
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/community/louvain.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -446,14 +446,17 @@ class Louvain {
cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
handle_.get_comms().get_size()};

vertex_cluster_weights_v = cugraph::collect_values_for_keys(handle_.get_comms(),
cluster_keys_v_.begin(),
cluster_keys_v_.end(),
cluster_weights_v_.data(),
next_clusters_v_.begin(),
next_clusters_v_.end(),
vertex_to_gpu_id_op,
handle_.get_stream());
vertex_cluster_weights_v =
cugraph::collect_values_for_keys(handle_.get_comms(),
cluster_keys_v_.begin(),
cluster_keys_v_.end(),
cluster_weights_v_.data(),
next_clusters_v_.begin(),
next_clusters_v_.end(),
vertex_to_gpu_id_op,
invalid_vertex_id<vertex_t>::value,
std::numeric_limits<weight_t>::max(),
handle_.get_stream());

src_cluster_weights =
edge_partition_src_property_t<graph_view_t, weight_t>(handle_, current_graph_view_);
Expand Down Expand Up @@ -533,6 +536,8 @@ class Louvain {
cluster_keys_v_.begin(),
cluster_keys_v_.end(),
cluster_weights_v_.begin(),
invalid_vertex_id<vertex_t>::value,
std::numeric_limits<weight_t>::max(),
detail::key_aggregated_edge_op_t<vertex_t, weight_t>{total_edge_weight, resolution},
thrust::make_tuple(vertex_t{-1}, weight_t{0}),
detail::reduce_op_t<vertex_t, weight_t>{},
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/structure/relabel_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ void relabel(raft::handle_t const& handle,
static_cast<double>(rx_label_pair_old_labels.size()) / load_factor),
rx_label_pair_old_labels.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};

Expand Down Expand Up @@ -185,7 +185,7 @@ void relabel(raft::handle_t const& handle,
std::max(static_cast<size_t>(static_cast<double>(unique_old_labels.size()) / load_factor),
unique_old_labels.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};

Expand All @@ -212,7 +212,7 @@ void relabel(raft::handle_t const& handle,
std::max(static_cast<size_t>(static_cast<double>(num_label_pairs) / load_factor),
static_cast<size_t>(num_label_pairs) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());

Expand Down
11 changes: 4 additions & 7 deletions cpp/src/structure/renumber_edgelist_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ renumber_edgelist(
load_factor),
static_cast<size_t>(partition.local_edge_partition_major_range_size(i)) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(
Expand Down Expand Up @@ -800,9 +800,6 @@ renumber_edgelist(
i,
handle.get_stream());

RAFT_CUDA_TRY(cudaStreamSynchronize(
handle.get_stream())); // cuco::static_map currently does not take stream

auto poly_alloc =
rmm::mr::polymorphic_allocator<char>(rmm::mr::get_current_device_resource());
auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, handle.get_stream());
Expand All @@ -811,7 +808,7 @@ renumber_edgelist(
std::max(static_cast<size_t>(static_cast<double>(segment_size) / load_factor),
static_cast<size_t>(segment_size) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(
Expand Down Expand Up @@ -857,7 +854,7 @@ renumber_edgelist(
static_cast<double>(renumber_map_minor_labels.size()) / load_factor),
renumber_map_minor_labels.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(
Expand Down Expand Up @@ -930,7 +927,7 @@ renumber_edgelist(raft::handle_t const& handle,
std::max(static_cast<size_t>(static_cast<double>(renumber_map_labels.size()) / load_factor),
renumber_map_labels.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(
Expand Down
18 changes: 10 additions & 8 deletions cpp/src/structure/renumber_utils_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void unrenumber_local_int_edges(
static_cast<double>(edge_partition_major_range_size) / load_factor),
static_cast<size_t>(edge_partition_major_range_size) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(
Expand Down Expand Up @@ -266,7 +266,7 @@ void unrenumber_local_int_edges(
std::max(static_cast<size_t>(static_cast<double>(segment_size) / load_factor),
static_cast<size_t>(segment_size) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(
Expand Down Expand Up @@ -319,7 +319,7 @@ void unrenumber_local_int_edges(
static_cast<double>(renumber_map_minor_labels.size()) / load_factor),
renumber_map_minor_labels.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};
auto pair_first = thrust::make_zip_iterator(
Expand Down Expand Up @@ -373,7 +373,7 @@ void renumber_ext_vertices(raft::handle_t const& handle,
cuco::static_map<vertex_t, vertex_t, cuda::thread_scope_device, decltype(stream_adapter)>>(
size_t{0},
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());
if (multi_gpu) {
Expand Down Expand Up @@ -410,6 +410,8 @@ void renumber_ext_vertices(raft::handle_t const& handle,
thrust::make_counting_iterator(local_int_vertex_first),
std::move(sorted_unique_ext_vertices),
detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{comm_size},
invalid_vertex_id<vertex_t>::value,
invalid_vertex_id<vertex_t>::value,
handle.get_stream());

renumber_map_ptr.reset();
Expand All @@ -421,7 +423,7 @@ void renumber_ext_vertices(raft::handle_t const& handle,
static_cast<size_t>(static_cast<double>(sorted_unique_ext_vertices.size()) / load_factor),
sorted_unique_ext_vertices.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());

Expand All @@ -442,7 +444,7 @@ void renumber_ext_vertices(raft::handle_t const& handle,
static_cast<double>(local_int_vertex_last - local_int_vertex_first) / load_factor),
static_cast<size_t>(local_int_vertex_last - local_int_vertex_first) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());

Expand Down Expand Up @@ -508,7 +510,7 @@ void renumber_local_ext_vertices(raft::handle_t const& handle,
static_cast<double>(local_int_vertex_last - local_int_vertex_first) / load_factor),
static_cast<size_t>(local_int_vertex_last - local_int_vertex_first) + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream());

Expand Down Expand Up @@ -677,7 +679,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle,
static_cast<double>(sorted_unique_int_vertices.size()) / load_factor),
sorted_unique_int_vertices.size() + 1),
cuco::sentinel::empty_key<vertex_t>{invalid_vertex_id<vertex_t>::value},
cuco::sentinel::empty_value<vertex_t>{0},
cuco::sentinel::empty_value<vertex_t>{invalid_vertex_id<vertex_t>::value},
stream_adapter,
handle.get_stream()};

Expand Down