From e94f801fc71d8083c33035be9d7b6622e6786f67 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 18 Sep 2023 19:28:44 -0700 Subject: [PATCH 1/7] fix compile errors with cccl 2.x --- cpp/include/cugraph/utilities/misc_utils.cuh | 5 +- .../cugraph/utilities/shuffle_comm.cuh | 50 ++++++----- cpp/src/community/detail/mis_impl.cuh | 16 ++-- cpp/src/community/detail/refine_impl.cuh | 37 +++++---- cpp/src/detail/collect_local_vertex_values.cu | 5 +- cpp/src/generators/erdos_renyi_generator.cu | 22 +++-- cpp/src/generators/simple_generators.cu | 37 +++++---- ...r_v_random_select_transform_outgoing_e.cuh | 16 ++-- ...v_transform_reduce_incoming_outgoing_e.cuh | 25 +++--- .../prims/update_edge_src_dst_property.cuh | 82 +++++++++++-------- cpp/src/sampling/random_walks.cuh | 41 ++++++---- cpp/src/structure/graph_view_impl.cuh | 6 +- .../renumber_sampled_edgelist_test.cu | 23 +++--- 13 files changed, 214 insertions(+), 151 deletions(-) diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index a62e8ce85ec..0dad22cdc50 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -25,6 +25,8 @@ #include #include +#include + #include #include #include @@ -43,7 +45,8 @@ std::tuple, std::vector> compute_offset_aligned_ed { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }); + cuda::proclaim_return_type( + [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; if (num_chunks > 1) { diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 6a260144324..22bc5b6b6f1 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include #include #include @@ -198,12 +200,13 @@ void multi_partition(ValueIterator value_first, value_last, thrust::make_zip_iterator( thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { - auto group_id = value_to_group_id_op(value); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple(group_id, - counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + cuda::proclaim_return_type>( + [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { + auto group_id = value_to_group_id_op(value); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -246,17 +249,19 @@ void multi_partition(KeyIterator key_first, rmm::device_uvector group_ids(num_keys, stream_view); rmm::device_uvector intra_partition_offsets(num_keys, stream_view); thrust::fill(rmm::exec_policy(stream_view), counts.begin(), counts.end(), size_t{0}); - thrust::transform(rmm::exec_policy(stream_view), - key_first, - key_last, - thrust::make_zip_iterator( - thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { - auto group_id = key_to_group_id_op(key); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple( - group_id, counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + thrust::transform( + rmm::exec_policy(stream_view), + key_first, + key_last, + thrust::make_zip_iterator( + thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), + cuda::proclaim_return_type>( + [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { + auto group_id = key_to_group_id_op(key); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -492,7 +497,7 @@ std::tuple mem_frugal_partition( key_first, key_last, key_group_id_less_t::value_type, KeyToGroupIdOp>{ - key_to_group_id_op, pivot})); + key_to_group_id_op, pivot})); auto second_size = num_elements - first_size; auto tmp_key_buffer = @@ -762,8 +767,9 @@ rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [I stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_value_first, - [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); + tx_value_first, cuda::proclaim_return_type([value_to_group_id_op] __device__(auto value) { + return value_to_group_id_op(value); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( @@ -796,7 +802,9 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); + tx_key_first, cuda::proclaim_return_type([key_to_group_id_op] __device__(auto key) { + return key_to_group_id_op(key); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( diff --git a/cpp/src/community/detail/mis_impl.cuh b/cpp/src/community/detail/mis_impl.cuh index bcd71af5a08..2659a982183 100644 --- a/cpp/src/community/detail/mis_impl.cuh +++ b/cpp/src/community/detail/mis_impl.cuh @@ -37,6 +37,8 @@ #include #include +#include + #include namespace cugraph { @@ -78,13 +80,13 @@ rmm::device_uvector maximal_independent_set( thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); // Set ranks of zero out-degree vetices to std::numeric_limits::lowest() - thrust::transform_if( - handle.get_thrust_policy(), - out_degrees.begin(), - out_degrees.end(), - ranks.begin(), - [] __device__(auto) { return std::numeric_limits::lowest(); }, - [] __device__(auto deg) { return deg == 0; }); + thrust::transform_if(handle.get_thrust_policy(), + out_degrees.begin(), + out_degrees.end(), + ranks.begin(), + cuda::proclaim_return_type( + [] __device__(auto) { return std::numeric_limits::lowest(); }), + [] __device__(auto deg) { return deg == 0; }); out_degrees.resize(0, handle.get_stream()); out_degrees.shrink_to_fit(handle.get_stream()); diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index e811aafc776..2e615064286 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -46,12 +46,15 @@ #include #include +#include + CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. namespace cuco { template <> -struct is_bitwise_comparable> : std::true_type {}; +struct is_bitwise_comparable> : std::true_type { +}; } // namespace cuco namespace cugraph { @@ -236,16 +239,17 @@ refine_clustering( weighted_degree_of_vertices.end(), vertex_louvain_cluster_weights.end())); - thrust::transform(handle.get_thrust_policy(), - wcut_deg_and_cluster_vol_triple_begin, - wcut_deg_and_cluster_vol_triple_end, - singleton_and_connected_flags.begin(), - [resolution] __device__(auto wcut_wdeg_and_louvain_volume) { - auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); - auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); - auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); - return wcut > (resolution * wdeg * (louvain_volume - wdeg)); - }); + thrust::transform( + handle.get_thrust_policy(), + wcut_deg_and_cluster_vol_triple_begin, + wcut_deg_and_cluster_vol_triple_end, + singleton_and_connected_flags.begin(), + cuda::proclaim_return_type([resolution] __device__(auto wcut_wdeg_and_louvain_volume) { + auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); + auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); + auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); + return (wcut > (resolution * wdeg * (louvain_volume - wdeg))) ? uint8_t{1} : uint8_t{0}; + })); edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); @@ -714,11 +718,12 @@ refine_clustering( vertices_in_mis.begin(), vertices_in_mis.end(), dst_vertices.begin(), - [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { - auto dst = *(dst_first + v - v_first); - return dst; - }); + cuda::proclaim_return_type( + [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { + auto dst = *(dst_first + v - v_first); + return dst; + })); cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream()); cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream()); diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu index 9d5d2cb553b..795902dfd87 100644 --- a/cpp/src/detail/collect_local_vertex_values.cu +++ b/cpp/src/detail/collect_local_vertex_values.cu @@ -19,6 +19,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -64,7 +66,8 @@ rmm::device_uvector collect_local_vertex_values_from_ext_vertex_value_p auto vertex_iterator = thrust::make_transform_iterator( d_vertices.begin(), - [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; }); + cuda::proclaim_return_type( + [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; })); d_local_values.resize(local_vertex_last - local_vertex_first, handle.get_stream()); thrust::fill( diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu index 6d847ae0bde..e4a367b46f0 100644 --- a/cpp/src/generators/erdos_renyi_generator.cu +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -28,6 +28,8 @@ #include #include +#include + namespace cugraph { template @@ -42,12 +44,13 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, "Implementation cannot support specified value"); auto random_iterator = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [seed] __device__(size_t index) { + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([seed] __device__(size_t index) { thrust::default_random_engine rng(seed); thrust::uniform_real_distribution dist(0.0, 1.0); rng.discard(index); return dist(rng); - }); + })); size_t count = thrust::count_if(handle.get_thrust_policy(), random_iterator, @@ -69,13 +72,14 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, indices_v.begin(), indices_v.end(), thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())), - [num_vertices] __device__(size_t index) { - size_t src = index / num_vertices; - size_t dst = index % num_vertices; - - return thrust::make_tuple(static_cast(src), - static_cast(dst)); - }); + cuda::proclaim_return_type>( + [num_vertices] __device__(size_t index) { + size_t src = index / num_vertices; + size_t dst = index % num_vertices; + + return thrust::make_tuple(static_cast(src), + static_cast(dst)); + })); handle.sync_stream(); diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu index 6dba63909c3..f2102302008 100644 --- a/cpp/src/generators/simple_generators.cu +++ b/cpp/src/generators/simple_generators.cu @@ -27,6 +27,8 @@ #include #include +#include + #include namespace cugraph { @@ -264,23 +266,24 @@ generate_complete_graph_edgelist( auto transform_iter = thrust::make_transform_iterator( thrust::make_counting_iterator(0), - [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { - size_t graph_index = index / (num_vertices * num_vertices); - size_t local_index = index % (num_vertices * num_vertices); - - vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); - vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); - - if (src == dst) { - src = invalid_vertex; - dst = invalid_vertex; - } else { - src += (graph_index * num_vertices); - dst += (graph_index * num_vertices); - } - - return thrust::make_tuple(src, dst); - }); + cuda::proclaim_return_type>( + [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { + size_t graph_index = index / (num_vertices * num_vertices); + size_t local_index = index % (num_vertices * num_vertices); + + vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); + vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); + + if (src == dst) { + src = invalid_vertex; + dst = invalid_vertex; + } else { + src += (graph_index * num_vertices); + dst += (graph_index * num_vertices); + } + + return thrust::make_tuple(src, dst); + })); output_iterator = thrust::copy_if(handle.get_thrust_policy(), transform_iter, diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index b238b964ede..219ce924b29 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include @@ -287,7 +289,7 @@ rmm::device_uvector get_sampling_index_without_replacement( #ifndef NO_CUGRAPH_OPS edge_t mid_partition_degree_range_last = static_cast(K * 10); // tuning parameter assert(mid_partition_degree_range_last > K); - size_t high_partition_over_sampling_K = K * 2; // tuning parameter + size_t high_partition_over_sampling_K = K * 2; // tuning parameter assert(high_partition_over_sampling_K > K); rmm::device_uvector sample_nbr_indices(frontier_degrees.size() * K, handle.get_stream()); @@ -596,8 +598,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); @@ -615,8 +618,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); // copy the neighbor indices back to sample_nbr_indices @@ -883,7 +887,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's // neighbors are distributed in minor_comm_size partitions) std::optional> sample_key_indices{ - std::nullopt}; // relevant only when (minor_comm_size > 1) + std::nullopt}; // relevant only when (minor_comm_size > 1) auto local_frontier_sample_counts = std::vector{}; auto local_frontier_sample_displacements = std::vector{}; if (minor_comm_size > 1) { diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 1349454f5b6..a14a276b98e 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -944,16 +946,19 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, minor_init); auto value_first = thrust::make_transform_iterator( view.value_first(), - [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); }); - thrust::scatter( - handle.get_thrust_policy(), - value_first + (*minor_key_offsets)[i], - value_first + (*minor_key_offsets)[i + 1], - thrust::make_transform_iterator( - (*(view.keys())).begin() + (*minor_key_offsets)[i], - [key_first = graph_view.vertex_partition_range_first( - this_segment_vertex_partition_id)] __device__(auto key) { return key - key_first; }), - tx_buffer_first); + cuda::proclaim_return_type( + [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); })); + thrust::scatter(handle.get_thrust_policy(), + value_first + (*minor_key_offsets)[i], + value_first + (*minor_key_offsets)[i + 1], + thrust::make_transform_iterator( + (*(view.keys())).begin() + (*minor_key_offsets)[i], + cuda::proclaim_return_type( + [key_first = graph_view.vertex_partition_range_first( + this_segment_vertex_partition_id)] __device__(auto key) { + return key - key_first; + })), + tx_buffer_first); device_reduce(major_comm, tx_buffer_first, vertex_value_output_first, diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index 2d72a075ca5..c1c2c15ae15 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -181,13 +183,14 @@ void update_edge_major_property(raft::handle_t const& handle, handle.get_stream()); auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_bools(handle, bool_first, bool_first + (*edge_partition_keys)[i].size(), @@ -202,8 +205,9 @@ void update_edge_major_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (*edge_partition_keys)[i].size(), @@ -312,21 +316,24 @@ void update_edge_major_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -391,9 +398,10 @@ void update_edge_major_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.major_offset_from_major_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), @@ -593,13 +601,14 @@ void update_edge_minor_property(raft::handle_t const& handle, auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_unaligned_bools( handle, bool_first, @@ -611,10 +620,10 @@ void update_edge_minor_property(raft::handle_t const& handle, std::get>(key_offsets_or_rx_displacements); auto bool_first = thrust::make_transform_iterator( thrust::make_counting_iterator(vertex_t{0}), - [rx_value_first] __device__(vertex_t v_offset) { + cuda::proclaim_return_type([rx_value_first] __device__(vertex_t v_offset) { return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_unaligned_bools( handle, bool_first, @@ -630,8 +639,9 @@ void update_edge_minor_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (key_offsets[j + 1] - key_offsets[j]), @@ -718,21 +728,24 @@ void update_edge_minor_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -799,9 +812,10 @@ void update_edge_minor_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.minor_offset_from_minor_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 46789c6b8bd..5a9ded02009 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -52,6 +52,8 @@ #include #include +#include + #include #include // FIXME: requirement for temporary std::getenv() #include @@ -197,19 +199,19 @@ struct col_indx_extract_t { void operator()( original::device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices original::device_vec_t const& - d_v_col_indx, // in: column indices, given by stepper's random engine + d_v_col_indx, // in: column indices, given by stepper's random engine original::device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step original::device_vec_t& - d_v_next_weights) // out: set of weights between src and destination vertices, for next step + d_v_next_weights) // out: set of weights between src and destination vertices, for next step const { thrust::transform_if( handle_.get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_paths_), // input1 - d_v_col_indx.begin(), // input2 - out_degs_, // stencil + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil thrust::make_zip_iterator( thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output [max_depth = max_depth_, @@ -378,7 +380,8 @@ struct random_walker_t { // scatter d_src_init_v to coalesced vertex vector: // - auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + auto dlambda = cuda::proclaim_return_type( + [stride = max_depth_] __device__(auto indx) { return indx * stride; }); // use the transform iterator as map: // @@ -539,10 +542,11 @@ struct random_walker_t { // delta = ptr_d_sizes[indx] - 1 // - auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - 1; - return ptr_d_coalesced[indx * stride + delta]; - }; + auto dlambda = cuda::proclaim_return_type( + [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }); // use the transform iterator as map: // @@ -575,9 +579,9 @@ struct random_walker_t { d_crt_out_degs, // |current set of vertex out degrees| = nelems, // to be used as stencil (don't scatter if 0) original::device_vec_t const& - d_sizes, // paths sizes used to provide delta in coalesced paths; - // pre-condition: assumed as updated to reflect new vertex additions; - // also, this is the number of _vertices_ in each path; + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter index_t stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) @@ -587,10 +591,11 @@ struct random_walker_t { { index_t const* ptr_d_sizes = original::raw_const_ptr(d_sizes); - auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - adjust - 1; - return indx * stride + delta; - }; + auto dlambda = + cuda::proclaim_return_type([stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }); // use the transform iterator as map: // @@ -762,7 +767,7 @@ random_walks_impl( // pre-allocate num_paths * max_depth; // original::device_vec_t d_coalesced_v(num_paths * max_depth, - stream); // coalesced vertex set + stream); // coalesced vertex set original::device_vec_t d_coalesced_w(num_paths * (max_depth - 1), stream); // coalesced weight set original::device_vec_t d_paths_sz(num_paths, stream); // paths sizes diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 7626784c13c..867ec21d0a6 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -50,6 +50,8 @@ #include #include +#include + #include #include #include @@ -155,7 +157,9 @@ rmm::device_uvector compute_major_degrees( thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_hypersparse_first - major_range_first), local_degrees.begin(), - [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); + cuda::proclaim_return_type([p_offsets] __device__(auto i) { + return p_offsets[i + 1] - p_offsets[i]; + })); if (use_dcs) { auto p_dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; diff --git a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu b/cpp/tests/sampling/renumber_sampled_edgelist_test.cu index 96c8d6173e7..a1b782b34cf 100644 --- a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu +++ b/cpp/tests/sampling/renumber_sampled_edgelist_test.cu @@ -35,6 +35,8 @@ #include #include +#include + struct RenumberSampledEdgelist_Usecase { size_t num_vertices{}; size_t num_sampled_edges{}; @@ -380,16 +382,17 @@ class Tests_RenumberSampledEdgelist auto renumbered_merged_vertex_first = thrust::make_transform_iterator( merged_vertices.begin(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t src) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), - it)]; - }); + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t src) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), + it)]; + })); thrust::reduce_by_key(handle.get_thrust_policy(), sort_key_first, From aa6be2848c53b1d0bc0e3885d669d5f321c0f613 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 19 Sep 2023 09:59:33 -0700 Subject: [PATCH 2/7] clang-format --- cpp/include/cugraph/utilities/shuffle_comm.cuh | 2 +- cpp/src/community/detail/refine_impl.cuh | 3 +-- ...er_v_random_select_transform_outgoing_e.cuh | 4 ++-- cpp/src/sampling/random_walks.cuh | 18 +++++++++--------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 22bc5b6b6f1..895a7a350e4 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -497,7 +497,7 @@ std::tuple mem_frugal_partition( key_first, key_last, key_group_id_less_t::value_type, KeyToGroupIdOp>{ - key_to_group_id_op, pivot})); + key_to_group_id_op, pivot})); auto second_size = num_elements - first_size; auto tmp_key_buffer = diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 2e615064286..c3792620a1e 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -53,8 +53,7 @@ CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. namespace cuco { template <> -struct is_bitwise_comparable> : std::true_type { -}; +struct is_bitwise_comparable> : std::true_type {}; } // namespace cuco namespace cugraph { diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 219ce924b29..3a065e10e49 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -289,7 +289,7 @@ rmm::device_uvector get_sampling_index_without_replacement( #ifndef NO_CUGRAPH_OPS edge_t mid_partition_degree_range_last = static_cast(K * 10); // tuning parameter assert(mid_partition_degree_range_last > K); - size_t high_partition_over_sampling_K = K * 2; // tuning parameter + size_t high_partition_over_sampling_K = K * 2; // tuning parameter assert(high_partition_over_sampling_K > K); rmm::device_uvector sample_nbr_indices(frontier_degrees.size() * K, handle.get_stream()); @@ -887,7 +887,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's // neighbors are distributed in minor_comm_size partitions) std::optional> sample_key_indices{ - std::nullopt}; // relevant only when (minor_comm_size > 1) + std::nullopt}; // relevant only when (minor_comm_size > 1) auto local_frontier_sample_counts = std::vector{}; auto local_frontier_sample_displacements = std::vector{}; if (minor_comm_size > 1) { diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 5a9ded02009..f86c5b4b66e 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -199,19 +199,19 @@ struct col_indx_extract_t { void operator()( original::device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices original::device_vec_t const& - d_v_col_indx, // in: column indices, given by stepper's random engine + d_v_col_indx, // in: column indices, given by stepper's random engine original::device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step original::device_vec_t& - d_v_next_weights) // out: set of weights between src and destination vertices, for next step + d_v_next_weights) // out: set of weights between src and destination vertices, for next step const { thrust::transform_if( handle_.get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_paths_), // input1 - d_v_col_indx.begin(), // input2 - out_degs_, // stencil + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil thrust::make_zip_iterator( thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output [max_depth = max_depth_, @@ -579,9 +579,9 @@ struct random_walker_t { d_crt_out_degs, // |current set of vertex out degrees| = nelems, // to be used as stencil (don't scatter if 0) original::device_vec_t const& - d_sizes, // paths sizes used to provide delta in coalesced paths; - // pre-condition: assumed as updated to reflect new vertex additions; - // also, this is the number of _vertices_ in each path; + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter index_t stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) @@ -767,7 +767,7 @@ random_walks_impl( // pre-allocate num_paths * max_depth; // original::device_vec_t d_coalesced_v(num_paths * max_depth, - stream); // coalesced vertex set + stream); // coalesced vertex set original::device_vec_t d_coalesced_w(num_paths * (max_depth - 1), stream); // coalesced weight set original::device_vec_t d_paths_sz(num_paths, stream); // paths sizes From a1fbc2297ccb8356f6001d05e0ae17a9f945fd48 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 19 Sep 2023 10:05:07 -0700 Subject: [PATCH 3/7] copyright --- cpp/src/generators/erdos_renyi_generator.cu | 2 +- cpp/src/generators/simple_generators.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu index e4a367b46f0..8448eeaf960 100644 --- a/cpp/src/generators/erdos_renyi_generator.cu +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu index f2102302008..65647be5de0 100644 --- a/cpp/src/generators/simple_generators.cu +++ b/cpp/src/generators/simple_generators.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 4539da1863551ecd4300dcd3ea0212b55f8e8726 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 6 Dec 2023 20:38:48 -0600 Subject: [PATCH 4/7] clang-format. --- cpp/src/community/detail/refine_impl.cuh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index e18825ff563..927f8ece9c5 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -246,12 +246,13 @@ refine_clustering( wcut_deg_and_cluster_vol_triple_begin, wcut_deg_and_cluster_vol_triple_end, singleton_and_connected_flags.begin(), - cuda::proclaim_return_type([resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) { - auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); - auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); - auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); - return wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight); - })); + cuda::proclaim_return_type( + [resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) { + auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); + auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); + auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); + return wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight); + })); edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); From 8ad9d3b803eebb46b5a98915c9181b8a0f758b7d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 6 Dec 2023 21:41:24 -0600 Subject: [PATCH 5/7] Add cuda::proclaim_return_type to refine_impl.cuh and sampling_post_processing_impl.cuh. --- cpp/src/community/detail/refine_impl.cuh | 41 ++++++++++--------- .../sampling_post_processing_impl.cuh | 12 ++++-- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 927f8ece9c5..eb874657f01 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -215,16 +215,17 @@ refine_clustering( : detail::edge_minor_property_view_t( louvain_assignment_of_vertices.data(), vertex_t{0}), *edge_weight_view, - [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { - weight_t weighted_cut_contribution{0}; + cuda::proclaim_return_type( + [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { + weight_t weighted_cut_contribution{0}; - if (src == dst) // self loop - weighted_cut_contribution = 0; - else if (src_cluster == dst_cluster) - weighted_cut_contribution = wt; + if (src == dst) // self loop + weighted_cut_contribution = 0; + else if (src_cluster == dst_cluster) + weighted_cut_contribution = wt; - return weighted_cut_contribution; - }, + return weighted_cut_contribution; + }), weight_t{0}, cugraph::reduce_op::plus{}, weighted_cut_of_vertices_to_louvain.begin()); @@ -241,18 +242,18 @@ refine_clustering( weighted_degree_of_vertices.end(), vertex_louvain_cluster_weights.end())); - thrust::transform( - handle.get_thrust_policy(), - wcut_deg_and_cluster_vol_triple_begin, - wcut_deg_and_cluster_vol_triple_end, - singleton_and_connected_flags.begin(), - cuda::proclaim_return_type( - [resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) { - auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); - auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); - auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); - return wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight); - })); + thrust::transform(handle.get_thrust_policy(), + wcut_deg_and_cluster_vol_triple_begin, + wcut_deg_and_cluster_vol_triple_end, + singleton_and_connected_flags.begin(), + cuda::proclaim_return_type([resolution, total_edge_weight] __device__( + auto wcut_wdeg_and_louvain_volume) { + auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); + auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); + auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); + return static_cast( + wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight)); + })); edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index 77d4f2d865f..852d82e78ab 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include namespace cugraph { @@ -1229,10 +1231,12 @@ renumber_and_compress_sampled_edgelist( auto pair_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), (*compressed_hops).begin()); auto value_pair_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_t{0}), [num_hops] __device__(size_t i) { - return thrust::make_tuple(static_cast(i / num_hops), - static_cast(i % num_hops)); - }); + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type>( + [num_hops] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_hops), + static_cast(i % num_hops)); + })); thrust::upper_bound(handle.get_thrust_policy(), pair_first, pair_first + (*compressed_label_indices).size(), From d0507a758084b6a100790a8f7b7de814a668fbc2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 8 Dec 2023 11:47:55 -0600 Subject: [PATCH 6/7] Apply cuda::proclaim_return_type to vertex_result.cu and sampling_post_processing_test.cu. --- cpp/src/mtmg/vertex_result.cu | 10 ++++++---- cpp/tests/sampling/sampling_post_processing_test.cu | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/src/mtmg/vertex_result.cu b/cpp/src/mtmg/vertex_result.cu index 5b1825656ff..414f1bdfa88 100644 --- a/cpp/src/mtmg/vertex_result.cu +++ b/cpp/src/mtmg/vertex_result.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cugraph { @@ -91,10 +92,11 @@ rmm::device_uvector vertex_result_view_t::gather( auto vertex_partition = vertex_partition_device_view_t(vertex_partition_view); - auto iter = - thrust::make_transform_iterator(local_vertices.begin(), [vertex_partition] __device__(auto v) { + auto iter = thrust::make_transform_iterator( + local_vertices.begin(), + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); thrust::gather(handle.get_thrust_policy(), iter, @@ -111,7 +113,7 @@ rmm::device_uvector vertex_result_view_t::gather( vertex_gpu_ids.begin(), vertex_gpu_ids.end(), thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), tmp_result.begin()), - [] __device__(int gpu) { return gpu; }, + thrust::identity{}, handle.get_stream()); // diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index e5267d75ac2..42131f876be 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -38,6 +38,8 @@ #include #include +#include + struct SamplingPostProcessing_Usecase { size_t num_labels{}; size_t num_seeds_per_label{}; @@ -318,7 +320,7 @@ bool check_renumber_map_invariants( auto renumbered_merged_vertex_first = thrust::make_transform_iterator( merged_vertices.begin(), - [sorted_org_vertices = + cuda::proclaim_return_type([sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), matching_renumbered_vertices = raft::device_span( matching_renumbered_vertices.data(), @@ -326,7 +328,7 @@ bool check_renumber_map_invariants( auto it = thrust::lower_bound( thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }); + })); thrust::reduce_by_key(handle.get_thrust_policy(), sort_key_first, @@ -1020,7 +1022,7 @@ class Tests_SamplingPostProcessing ? this_label_output_edgelist_srcs.begin() : this_label_output_edgelist_dsts.begin()) + old_size, - [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), + cuda::proclaim_return_type([offsets = raft::device_span(d_offsets.data(), d_offsets.size()), nzd_vertices = renumbered_and_compressed_nzd_vertices ? thrust::make_optional>( @@ -1036,7 +1038,7 @@ class Tests_SamplingPostProcessing } else { return base_v + static_cast(idx); } - }); + })); thrust::copy(handle.get_thrust_policy(), renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(), From 31d4be8febdea3c6b001955f0f29ce4aa7ed384e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 8 Dec 2023 12:15:57 -0600 Subject: [PATCH 7/7] clang-format. --- .../sampling/sampling_post_processing_test.cu | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index 42131f876be..6be735c3482 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -320,15 +320,16 @@ bool check_renumber_map_invariants( auto renumbered_merged_vertex_first = thrust::make_transform_iterator( merged_vertices.begin(), - cuda::proclaim_return_type([sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - })); + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + })); thrust::reduce_by_key(handle.get_thrust_policy(), sort_key_first, @@ -1022,23 +1023,24 @@ class Tests_SamplingPostProcessing ? this_label_output_edgelist_srcs.begin() : this_label_output_edgelist_dsts.begin()) + old_size, - cuda::proclaim_return_type([offsets = raft::device_span(d_offsets.data(), d_offsets.size()), - nzd_vertices = - renumbered_and_compressed_nzd_vertices - ? thrust::make_optional>( - (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, - (offset_end_offset - offset_start_offset) - 1) - : thrust::nullopt, - base_v] __device__(size_t i) { - auto idx = static_cast(thrust::distance( - offsets.begin() + 1, - thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); - if (nzd_vertices) { - return (*nzd_vertices)[idx]; - } else { - return base_v + static_cast(idx); - } - })); + cuda::proclaim_return_type( + [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), + nzd_vertices = + renumbered_and_compressed_nzd_vertices + ? thrust::make_optional>( + (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, + (offset_end_offset - offset_start_offset) - 1) + : thrust::nullopt, + base_v] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); + if (nzd_vertices) { + return (*nzd_vertices)[idx]; + } else { + return base_v + static_cast(idx); + } + })); thrust::copy(handle.get_thrust_policy(), renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(),