From 897ecc876da7d18fb6a7954aa8d8adaaecd7e2fa Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 14 Sep 2022 10:29:46 -0700 Subject: [PATCH 01/18] rename a prim file --- ...utgoine_e.cuh => per_v_random_select_transform_outgoine_e.cuh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/src/prims/{per_v_random_select_and_transform_outgoine_e.cuh => per_v_random_select_transform_outgoine_e.cuh} (100%) diff --git a/cpp/src/prims/per_v_random_select_and_transform_outgoine_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh similarity index 100% rename from cpp/src/prims/per_v_random_select_and_transform_outgoine_e.cuh rename to cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh From 0f3e9419dceb2ca012b900fab046badd9c2002a8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 14 Sep 2022 10:30:40 -0700 Subject: [PATCH 02/18] initial prim implementation --- .../cugraph/utilities/device_functors.cuh | 22 + ...r_v_random_select_transform_outgoine_e.cuh | 663 +++++++++++++++++- 2 files changed, 684 insertions(+), 1 deletion(-) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 8354f7701f3..8a75f2a3379 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -75,6 +75,21 @@ struct check_in_range_t { __device__ bool operator()(T val) const { return (val >= min) && (val < max); } }; +template +struct strided_sum_t { + T const* values{nullptr}; + size_t stride{0}; + size_t count{0}; + + __device__ T operator()(size_t start_offset) const { + T sum{0}; + for (size_t j = 0; j < count; ++j) { + sum += values[start_offset + stride * j]; + } + return sum; + } +}; + template struct shift_left_t { T offset{}; @@ -104,6 +119,13 @@ struct multiply_and_add_t { __device__ T operator()(T input) const { return input * multiplier + adder; } }; +template +struct divider_t { + T divisor{}; + + __device__ T operator()(T input) const { return input / divisor; } +}; + } // namespace detail } // namespace cugraph diff --git a/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh index bc213c713c4..8d22b906ff6 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh @@ -17,6 +17,652 @@ namespace cugraph { +namespace detail { + +// convert a (neighbor index, key index) pair to a (col_comm_rank, neighbor index, key index) +// triplet, col_comm_rank is set to -1 if an neighbor index is invalid +template +struct convert_pair_to_triplet_t { + raft::device_span gathered_local_degrees{}; + size_t stride{}; + size_t K{}; + int32_t col_comm_size{}; + edge_t invalid_idx{}; + + __device__ int32_t operator()(thrust::tuple index_pair) const + { + auto nbr_idx = thrust::get<0>(index_pair); + auto key_idx = thrust::get<1>(index_pair); + auto local_nbr_idx = nbr_idx; + int32_t col_comm_rank{-1}; + if (nbr_idx != invalid_idx) { + col_comm_rank = col_comm_size - 1; + for (int rank = 0; rank < col_comm_size - 1; ++rank) { + auto local_degree = gathered_local_degrees[stride * rank + key_idx]; + if (local_nbr_idx < local_degree) { + col_comm_rank = rank; + break; + } else { + local_nbr_idx -= local_degree; + } + } + } + return thrust::make_tuple(col_comm_rank, local_nbr_idx, key_idx); + } +}; + +template +struct invalid_col_comm_rank_t { + int32_t invalid_col_comm_rank{}; + __device__ bool operator()(thrust::tuple triplet) const + { + return thrust::get<1>(triplet) == invalid_col_comm_rank; + } +}; + +template +struct transform_and_count_local_nbr_indices_t { + using key_t = typename thrust::iterator_traits::value_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + edge_partition_device_view_t + edge_partition{}; + KeyIterator key_first{}; + OffsetIterator offset_first{}; + LocalNbrIdxIterator local_nbr_idx_first{}; + OutputValueIterator output_value_first{}; + thrust::optional output_count_first{}; + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgeOp e_op{}; + edge_t invalid_idx{}; + thrust::optional invalid_value{thrust::nullopt}; + + __device__ void operator()(size_t key_idx) const + { + auto key = *(key_first + key_idx); + if constexpr (std::is_same_v) { + major = key; + } else { + major = thrust::get<0>(key); + } + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + thrust::optional weights{thrust::nullopt}; + [[maybe_unused]] edge_t local_degree{0}; + if constexpr (GraphViewType::is_multi_gpu) { + auto major_hypersparse_first = edge_partition.major_hypersparse_first(); + if (major_hypersparse_first && (major >= *major_hypersparse_first)) { + auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); + if (major_hypersparse_idx) { + thrust::tie(indices, weights, local_degree) = edge_partition.local_edges( + edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) + + *major_hypersparse_idx); + } + } else { + thrust::tie(indices, weights, local_degree) = edge_partition.local_edges(major_offset); + } + } else { + thrust::tie(indices, weights, local_degree) = edge_partition.local_edges(major_offset); + } + auto start_offset = *(local_nbr_idx_offset_first + key_idx); + auto end_offset = *(local_nbr_idx_offset_first + (key_idx + 1)); + + size_t num_valid_local_nbr_indices{0}; + for (size_t i = start_offset; i < end_offset; ++i) { + auto local_nbr_idx = *(local_nbr_idx_first + i); + if (local_nbr_idx != invalid_idx) { + assert(local_nbr_idx < local_degree); + auto minor = indices[local_nbr_idx]; + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + + std::conditional_t + key_or_src{}; // key if major + std::conditional_t + key_or_dst{}; // key if major + if constexpr (GraphViewType::is_storage_transposed) { + key_or_src = minor; + key_or_dst = key; + } else { + key_or_src = key; + key_or_dst = minor; + } + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + *(output_first + i) = evaluate_edge_op() + .compute(key_or_src, + key_or_dst, + weights ? (*weights)[local_nbr_idx] : weight_t{1.0}, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + e_op); + ++num_valid_local_nbr_indices; + } else if (invalid_value) { + *(output_first + i) = *invalid_value; + } else { + assert(output_count_first); + } + } + if (output_count_first) { *(*output_count_first + key_idx) = num_valid_local_nbr_indices; } + } +}; + +template +struct copy_and_fill_sample_e_op_results_t { + raft::device_span sample_counts{}; + raft::device_span sample_displacements{}; + InputIterator input_first{}; + OutputIterator output_first{}; + size_t K{}; + + __device__ void operator()(size_t i) const + { + auto num_valid_samples = sample_counts[i]; + for (size_t j = 0; j < num_valid_samples; ++j) { // copy + *(output_first + K * i + j) = *(input_first + sample_displacements[i] + j); + } + for (size_t j = num_valid_samples; j < K; ++j) { // fill + *(output_first + K * i + j) = invalid_value; + } + } +}; + +template +std::tuple>, + decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> +per_v_random_select_transform_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierBucketType const& frontier, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, +#if 0 // FIXME: This will be necessary to include edge IDs in the output. + // Primitives API should be updated to support this in a consistent way. + EdgeValueInputWrapper egde_value_input, +#endif + EdgeOp e_op, + raft::random::RngState& rng_state, + size_t K, + bool with_replacement, + std::optional invalid_value, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename VertexFrontierBucketType::key_type; + + using edge_partition_src_input_device_view_t = std::conditional_t< + std::is_same_v, + edge_partition_endpoint_dummy_property_device_view_t, + std::conditional_t, + edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeSrcValueInputWrapper::value_iterator>>>; + using edge_partition_dst_input_device_view_t = std::conditional_t< + std::is_same_v, + edge_partition_endpoint_dummy_property_device_view_t, + std::conditional_t, + edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeDstValueInputWrapper::value_iterator>>>; + + static_assert(!GraphViewType::is_storage_transposed == incoming); + static_assert(std::is_same_v::result_type, + T>); + + if (do_expensive_check) { + // FIXME: better re-factor this check function? + vertex_t const* frontier_vertex_first{nullptr}; + vertex_t const* frontier_vertex_last{nullptr}; + if constexpr (std::is_same_v) { + frontier_vertex_first = frontier.begin(); + frontier_vertex_last = frontier.end(); + } else { + frontier_vertex_first = thrust::get<0>(frontier.begin().get_iterator_tuple()); + frontier_vertex_last = thrust::get<0>(frontier.end().get_iterator_tuple()); + } + auto num_invalid_keys = + frontier.size() - + thrust::count_if(handle.get_thrust_policy(), + frontier_vertex_first, + frontier_vertex_last, + check_in_range_t{graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last()}); + if constexpr (GraphViewType::is_multi_gpu) { + num_invalid_keys = host_scalar_allreduce( + handle.get_comms(), num_invalid_keys, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS(num_invalid_keys == size_t{0}, + "Invalid input argument: frontier includes out-of-range keys."); + } + + auto frontier_key_first = frontier.begin(); + auto frontier_key_last = frontier.end(); + + std::vector local_frontier_sizes{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + local_frontier_sizes = host_scalar_allgather( + col_comm, + static_cast(thrust::distance(frontier_key_first, frontier_key_last)), + handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast( + static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; + } + std::vector local_frontier_displacements(local_frontier_sizes.size()); + std::exclusive_scan(local_frontier_sizes.begin(), + local_frontier_sizes.end(), + local_frontier_displacements.begin(), + size_t{0}); + + // 1. aggregate frontier + + auto aggregate_local_frontier_keys = + GraphViewType::is_multi_gpu + ? std::make_optional(size_t{0}, rmm::cuda_stream_view{}))>( + local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) + : std::nullopt; + if constexpr (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + device_allgatherv(col_comm, + frontier_key_first, + get_dataframe_buffer_begin(aggregate_local_frontier_keys), + local_frontier_sizes, + local_frontier_displacements, + handle.get_stream()); + } + + // 2. compute degrees + + auto aggregate_local_frontier_local_degrees = + GrpahViewType::is_multi_gpu + ? std::make_optional>( + local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) + : std::nullopt; + rmm::device_uvector frontier_degrees(frontier.size(), handle.get_stream()); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + + vertex_t const* edge_partition_frontier_major_first{nullptr}; + vertex_t const* edge_partition_frontier_major_last{nullptr}; + + auto edge_partition_frontier_key_first = + (GraphViewType::is_multi_gpu ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) + : frontier_key_first) + + local_frontier_displacements[i]; + if constexpr (std::is_same_v) { + edge_partition_frontier_major_first = edge_partition_frontier_key_first; + } else { + edge_partition_frontier_major_first = thrust::get<0>(edge_partition_frontier_key_first); + } + edge_partition_frontier_major_last = + edge_partition_frontier_major_first + local_frontier_sizes[i]; + + auto edge_partition_frontier_local_degrees = edge_partition.compute_local_degrees( + raft::device_span(edge_partition_frontier_major_first, + edge_partition_frontier_size), + handle.get_stream()); + + if constexpr (GraphViewType::is_multi_gpu) { + // FIXME: this copy is unnecessary if edge_partition.compute_local_degrees() takes a pointer + // to the output array + thrust::copy( + handle.get_thrust_policy(), + edge_partition_frontier_local_degrees.begin(), + edge_partition_frontier_local_degrees.end(), + (*aggregate_local_frontier_local_degrees).begin() + local_frontier_displacements[i]); + } else { + frontier_degrees = std::move(edge_partition_frontier_local_degrees); + } + } + + auto frontier_gathered_local_degrees = + GrpahViewType::is_multi_gpu + ? std::make_optional>(frontier.size() * K, handle.get_stream()) + : std::nullopt; + if constexpr (GrpahViewType::is_multi_gpu) { + std::tie(frontier_gathered_local_degrees, std::ignore) = + shuffle_values(col_comm, + (*aggregate_local_frontier_local_degrees).begin(), + local_frontier_sizes, + handle.get_stream()); + thrust::tablulate(handle.get_thrust_policy(), + frontier_degrees.begin(), + frontier_degrees.end(), + strided_sum_t{frontier_gathered_local_degrees.data(), + frontier.size(), + static_cast(col_comm_size)}); + aggregate_local_frontier_local_degrees = std::nullopt; + } + + // 3. randomly select neighbor indices + + rmm::device_uvector sample_nbr_indices(frontier.size() * K, handle.get_stream()); + // FIXME: get_sampling_index is inefficient when degree >> K & with_replacement = false + // FIXME: Need to verify that sample_nbr_indices[] are filled with INVALID_IDX when degree == 0 + // (with_replacement = true) or degree < K (with_replacement = false) + cugraph_ops::get_sampling_index(sample_nbr_indices.data(), + rng_state, + frontier_degrees.data(), + static_cast(frontier.size()), + static_cats(K), + with_replacement, + handle.get_stream()); + frontier_degrees.resize(0, handle.get_stream()); + frontier_degrees.shrink_to_fit(handle.get_stream()); + + // 4. shuffle randomly selected indices + + auto sample_local_nbr_indices = std::move( + sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's + // neighbors are distributed in col_comm_size partitions) + auto sample_key_indices = + GraphViewType::is_multi_gpu + ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; + auto local_frontier_sample_counts = + GrpahViewType::is_multi_gpu ? std::vector(0) : std::vector{frontier.size() * K}; + auto local_frontier_sample_displacements = + GrpahViewType::is_multi_gpu ? std::vector(0) : std::vector{0}; + if constexpr (GrpahViewType::is_multi_gpu) { + auto col_comm_ranks = + rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); + auto sample_key_indices = + rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); + auto input_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(sample_local_nbr_indices.begin(), + thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), + divider_t{K}))); + thrust::transform(handle.get_thrsut_policy(), + input_pair_first, + input_pair_first + sample_indices.size(), + thrust::make_zip_iterator(thrust::make_tuple( + col_comm_ranks.begin(), sample_indices.begin(), key_indices.begin())), + convert_pair_to_triplet_t{ + raft::device_span(frontier_gathered_local_degrees.data(), + frontier_gathered_local_degrees.size()), + frontier.size(), + K, + col_comm_size, + ops::gnn::graph::INVALID_IDX}); + + frontier_gathered_local_degrees = std::nullopt; + + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + sample_local_nbr_indices.bgin(), col_comm_ranks.begin(), key_indices.begin())); + sample_local_nbr_indices.resize( + thrust::distance(triplet_first, + thrust::remove_if(handle.get_thrust_policy(), + triplet_first, + triplet_first + sample_local_nbr_indices.size(), + invalid_col_comm_rank{int32_t{-1}})), + handle.get_stream()); + col_comm_ranks.resize(sample_local_nbr_indices.size(), handle.get_stream()); + sample_key_indices.resize(sample_local_nbr_indices.size(), handle.get_stream()); + + auto d_tx_counts = groupby_and_count(col_comm_ranks.begin(), + col_comm_ranks.end(), + thrust::make_zip_iterator(thrust::make_tuple( + sample_local_nbr_indices.begin(), key_indices.begin())), + thrust::identity{}, + col_comm_size, + std::numeric_limits::max(), + handle.get_stream()); + + std::vector h_tx_counts(d_tx_counts.size()); + raft::update_host( + h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + handle.sync_stream(); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(sample_local_nbr_indices.bgin(), key_indices.begin())); + auto [rx_value_buffer, rx_counts] = + shuffle_values(col_comm, pair_first, h_tx_counts, handle.get_stream()); + + sample_local_nbr_indices = std::move(std::get<0>(rx_value_buffer)); + sample_key_indices = std::move(std::get<1>(rx_value_buffer)); + local_frontier_sample_displacements = std::vector(rx_counts.size()); + std::exclusive_scan( + rx_counts.begin(), rx_counts.end(), local_frontier_sample_displacements.begin()); + local_frontier_sample_counts = std::move(rx_counts); + } + + // 5. transform + + auto sample_counts = + (!GraphViewType::is_multi_gpu && !invalid_value) + ? std::make_optional>(frontier.size(), handle.get_stream()) + : std::nullopt; + auto sample_e_op_results = allocate_dataframe_buffer( + local_frontier_sample_displacements.back() + local_frontier_sample_counts.back(), + handle.get_stream()); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + + auto edge_partition_frontier_key_first = + (GraphViewType::is_multi_gpu ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) + : frontier_key_first) + + local_frontier_displacements[i]; + auto edge_partition_sample_local_nbr_index_first = + sample_local_nbr_indices.begin() + local_frontier_sample_displacements[i]; + + auto edge_partition_sample_e_op_result_first = + get_dataframe_buffer_beign(sample_e_op_results) + local_frontier_sample_displacements[i]; + + edge_partition_src_input_device_view_t edge_partition_src_value_input{}; + edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input, i); + } else { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input, i); + edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); + } + + if constexpr (GraphViewType::is_multi_gpu) { + thrust::sort_by_key(handle.get_thrust_policy(), + (*sample_key_indices).begin() + local_frontier_sample_displacements[i], + (*sample_key_indices).begin() + local_frontier_sample_displacements[i] + + local_frontier_sample_counts[i], + edge_partition_sample_local_nbr_index_first); + auto num_unique_key_indices = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(local_frontier_sample_counts[i]), + is_first_in_run_t{sample_key_indices.data() + + local_frontier_sample_displacements[i]}); + rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_strema()); + rmm::device_uvector unique_key_local_nbr_idx_counts(num_unique_key_indices, + handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), + sample_key_indices.begin() + local_frontier_sample_displacements[i], + sample_key_indices.begin() + local_frontier_sample_displacements[i] + + local_frontier_sample_counts[i], + thrust::make_constant_iterator(edge_t{1}), + unique_key_indices.begin(), + unique_key_local_nbr_idx_counts.begin()); + rmm::device_uvector unique_key_local_nbr_idx_offsets(num_unique_key_indices + 1, + handle.get_stream()); + unique_key_local_nbr_idx_offsets.set_element_to_zero_async(size_t{0}, handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + unique_local_nbr_idx_counts.begin(), + unique_local_nbr_idx_counts.end(), + unique_local_nbr_idx_offsets.begin() + 1); + auto offset_first = unique_key_local_nbr_idx_offsets.begin(); + thrust::for_each( + handle.get_thrust_policy(), + unique_key_indices.begin(), + unique_key_indices.end(), + transform_and_count_local_nbr_indices_t{edge_partition, + edge_partition_frontier_key_first, + offset_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + thrust::nullopt, + edge_partition_src_value_input, + edge_partition_dst_value_input, + e_op, + invalid_idx, + invalid_value}); + } else { + auto offset_first = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), + multiplier_t{K}); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(unique_key_indices.size()), + transform_and_count_local_nbr_indices_t< + GraphViewType, + decltype(edge_partition_frontier_key_first), + decltype(offset_first), + decltype(edge_partition_sample_local_nbr_index_first), + decltype(edge_partition_sample_e_op_result_first), + size_t const*, + edge_partition_src_input_device_view_t, + edge_partition_dst_input_device_view_t, + EdgeOp, + T>{edge_partition, + edge_partition_frontier_key_first, + offset_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + sample_counts ? thrust::optional((*sample_counts).data()) + : thrust::nullopt, + edge_partition_src_value_input, + edge_partition_dst_value_input, + e_op, + invalid_idx, + invalid_value}); + } + } + + // 6. shuffle randomly selected & transformed results and update sample_offsets + + auto sample_offsets = invalid_value ? std::nullopt + : std::make_optional>( + frontier.size() + 1, handle.get_stream()); + if (GraphViewType::is_multi_gpu) { + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(sample_e_op_results.bgin(), sample_key_indices.begin())); + auto [rx_value_buffer, rx_counts] = + shuffle_values(col_comm, pair_first, local_frontier_sample_counts, handle.get_stream()); + sample_e_op_results = std::move(std::get<0>(rx_value_buffer)); + sample_key_indices = std::move(std::get<1>(rx_value_buffer)); + // FIXME: better refactor this sort-and-reduce-by-key + thrust::sort_by_key(handle.get_thrust_policy(), + sample_key_indices.begin(), + sample_key_indices.end(), + sample_e_op_results.begin()); + auto num_unique_key_indices = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(sample_key_indices.size()), + is_first_in_run_t{sample_key_indices.data()}, + rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_strema()); + rmm::device_uvector unique_key_sample_counts(num_unique_key_indices, + handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), + sample_key_indices.begin(), + sample_key_indices.end(), + thrust::make_constant_iterator(edge_t{1}), + unique_key_indices.begin(), + unique_key_sample_counts.begin()); + sample_counts = rmm::device_uvector(frontier.size(), handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), (*sample_counts).begin(), (*sample_counts).end(), size_t{0}); + thrust::scatter(handle.get_thrust_policy(), unique_key_sample_counts.begin(), unique_key_sample_counts.end(), unique_key_indices.begin(), (*sample_counts).begin()); + if (invalid_value) { + rmm::device_uvector sample_displacements((*sample_counts).size(), + handle.get_stream()); + thrust::exclusive_scan(handle.get_thrust_policy(), + (*sample_counts).beign(), + (*sample_counts).emd(), + sample_displacements.begin()); + auto tmp_sample_e_op_results = + allocate_dataframe_buffer(frontier.size() * K, handle.get_stream()); + auto input_first = get_dataframe_buffer_begin(sample_e_op_results); + auto output_first = get_dataframe_buffer_begin(tmp_sample_e_op_results); + thrust::for_each( + handle.get_thrust_policy(), + unique_key_indices.begin(), + unique_key_indices.end(), + copy_and_fill_sample_e_op_results_t{ + raft::device_span((*sample_counts).data(), (*sample_counts).size()), + raft::device_span(sample_displacements.data(), sample_displacements.size()), + input_first, + output_first, + K}); + sample_e_op_results = std::move(tmp_sample_e_op_results); + } + else { + (*sample_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + (*sample_counts).begin(), + (*sample_counts).end(), + (*sample_offsets).begin() + 1); + } + } else { + if (!invalid_value) { + (*sample_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + (*sample_counts).begin(), + (*sample_counts).end(), + (*sample_offsets).begin() + 1); + } + } + + return std::make_tuple(std::move(sample_offsets), std::move(sample_e_op_results)); +} + +} // namespace detail + /** * @brief Randomly select and transform the input (tagged-)vertices' outgoing edges with biases. * @@ -86,12 +732,16 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, #endif EdgeBiasOp e_bias_op, EdgeOp e_op, + raft::random::RngState& rng_state, size_t K, bool with_replacement, std::optional invalid_value, bool do_expensive_check = false) { static_assert(false, "unimplemented."); + + return std::make_tuple(std::nullopt, + allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); } /** @@ -159,12 +809,23 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, EdgeValueInputWrapper egde_value_input, #endif EdgeOp e_op, + raft::random::RngState& rng_state, size_t K, bool with_replacement, std::optional invalid_value, bool do_expensive_check = false) { - static_assert(false, "unimplemented."); + return detail::per_v_random_select_transform_e(hanlde, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + e_op, + rng_state, + K, + with_replacement, + invalid_value, + do_expensive_check); } } // namespace cugraph From 06598aecc4f0d150641451df9f3524430cf9262b Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 14 Sep 2022 22:49:51 -0700 Subject: [PATCH 03/18] refactor prim tests --- cpp/tests/prims/mg_count_if_e.cu | 169 +++----------- cpp/tests/prims/mg_extract_if_e.cu | 105 ++------- ...extract_transform_v_frontier_outgoing_e.cu | 116 +++------- ..._v_transform_reduce_incoming_outgoing_e.cu | 214 +++++------------- cpp/tests/prims/mg_reduce_v.cu | 157 +++---------- cpp/tests/prims/mg_transform_reduce_e.cu | 182 +++------------ cpp/tests/prims/mg_transform_reduce_v.cu | 92 +++----- ...orm_reduce_v_frontier_outgoing_e_by_dst.cu | 116 +++------- cpp/tests/prims/property_generator.cuh | 143 ++++++++++++ 9 files changed, 409 insertions(+), 885 deletions(-) create mode 100644 cpp/tests/prims/property_generator.cuh diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 25bdb1e4190..5c96570c7d2 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -50,119 +52,6 @@ #include -template -struct property_type { - using type = std::conditional_t<(sizeof...(Args) > 1), - thrust::tuple, - typename thrust::tuple_element<0, thrust::tuple>::type>; -}; - -template -struct property_transform - : public thrust::unary_function::type> { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - - template ::type> - constexpr __device__ - typename std::enable_if_t::value, type> - operator()(const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return thrust::make_tuple(static_cast(value)...); - } - - template ::type> - constexpr __device__ typename std::enable_if_t::value, type> operator()( - const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return static_cast(value); - } -}; - -template typename Tuple, typename... Args> -struct property_transform> : public property_transform { -}; - -template -struct generate_impl { - private: - using type = typename property_type::type; - using property_buffer_type = std::conditional_t< - (sizeof...(Args) > 1), - std::tuple...>, - rmm::device_uvector>::type>>; - - public: - static thrust::tuple initial_value(int init) - { - return thrust::make_tuple(static_cast(init)...); - } - template - static auto vertex_property(rmm::device_uvector& labels, - int hash_bin_count, - raft::handle_t const& handle) - { - auto data = cugraph::allocate_dataframe_buffer(labels.size(), handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - labels.begin(), - labels.end(), - zip, - property_transform(hash_bin_count)); - return data; - } - template - static auto vertex_property(thrust::counting_iterator begin, - thrust::counting_iterator end, - int hash_bin_count, - raft::handle_t const& handle) - { - auto length = thrust::distance(begin, end); - auto data = cugraph::allocate_dataframe_buffer(length, handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - begin, - end, - zip, - property_transform(hash_bin_count)); - return data; - } - - template - static auto column_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_dst_property_t(handle, graph_view); - update_edge_dst_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } - - template - static auto row_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_src_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } -}; - -template -struct generate : public generate_impl { - static T initial_value(int init) { return static_cast(init); } -}; -template -struct generate> : public generate_impl { -}; - struct Prims_Usecase { bool check_correctness{true}; bool test_weighted{false}; @@ -216,11 +105,12 @@ class Tests_MGCountIfE const int hash_bin_count = 5; - auto vertex_property_data = - generate::vertex_property((*d_mg_renumber_map_labels), hash_bin_count, *handle_); - auto col_prop = - generate::column_property(*handle_, mg_graph_view, vertex_property_data); - auto row_prop = generate::row_property(*handle_, mg_graph_view, vertex_property_data); + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -231,11 +121,12 @@ class Tests_MGCountIfE auto result = count_if_e( *handle_, mg_graph_view, - row_prop.view(), - col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - return row_property < col_property; + mg_src_prop.view(), + mg_dst_prop.view(), + [] __device__(auto row, auto col, weight_t wt, auto src_property, auto dst_property) { + return src_property < dst_property; }); + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -253,23 +144,23 @@ class Tests_MGCountIfE *handle_, input_usecase, prims_usecase.test_weighted, false); auto sg_graph_view = sg_graph.view(); - auto sg_vertex_property_data = generate::vertex_property( + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - hash_bin_count, - *handle_); - auto sg_col_prop = - generate::column_property(*handle_, sg_graph_view, sg_vertex_property_data); - auto sg_row_prop = - generate::row_property(*handle_, sg_graph_view, sg_vertex_property_data); + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); auto expected_result = count_if_e( *handle_, sg_graph_view, - sg_row_prop.view(), - sg_col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - return row_property < col_property; + sg_src_prop.view(), + sg_dst_prop.view(), + [] __device__(auto row, auto col, weight_t wt, auto src_property, auto dst_property) { + return src_property < dst_property; }); ASSERT_TRUE(expected_result == result); } @@ -288,14 +179,14 @@ using Tests_MGCountIfE_Rmat = Tests_MGCountIfE; TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>(std::get<0>(param), - std::get<1>(param)); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>( + run_current_test, false>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -303,14 +194,14 @@ TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) TEST_P(Tests_MGCountIfE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>(std::get<0>(param), - std::get<1>(param)); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGCountIfE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>( + run_current_test, true>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/prims/mg_extract_if_e.cu b/cpp/tests/prims/mg_extract_if_e.cu index 988905b9800..47ae30537bb 100644 --- a/cpp/tests/prims/mg_extract_if_e.cu +++ b/cpp/tests/prims/mg_extract_if_e.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -51,39 +53,6 @@ #include -template -__device__ auto make_type_casted_tuple_from_scalar(T val, std::index_sequence) -{ - return thrust::make_tuple( - static_cast::type>(val)...); -} - -template -__device__ __host__ auto make_property_value(T val) -{ - property_t ret{}; - if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { - ret = make_type_casted_tuple_from_scalar( - val, std::make_index_sequence::value>{}); - } else { - ret = static_cast(val); - } - return ret; -} - -template -struct property_transform_t { - int mod{}; - - constexpr __device__ property_t operator()(vertex_t const v) const - { - static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || - std::is_arithmetic_v); - cuco::detail::MurmurHash3_32 hash_func{}; - return make_property_value(hash_func(v) % mod); - } -}; - template __device__ bool compare_equal_scalar(T const& lhs, T const& rhs) { @@ -146,7 +115,7 @@ class Tests_MGExtractIfE template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { @@ -160,7 +129,7 @@ class Tests_MGExtractIfE hr_clock.start(); } - auto [mg_graph, mg_renumber_map_labels] = + auto [mg_graph, d_mg_renumber_map_labels] = cugraph::test::construct_graph( *handle_, input_usecase, true, true); @@ -178,28 +147,12 @@ class Tests_MGExtractIfE constexpr int hash_bin_count = 5; - auto mg_property_buffer = cugraph::allocate_dataframe_buffer( - mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform(handle_->get_thrust_policy(), - (*mg_renumber_map_labels).begin(), - (*mg_renumber_map_labels).end(), - cugraph::get_dataframe_buffer_begin(mg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t mg_src_properties( - *handle_, mg_graph_view); - cugraph::edge_dst_property_t mg_dst_properties( - *handle_, mg_graph_view); - - update_edge_src_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_src_properties); - update_edge_dst_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_dst_properties); + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -210,8 +163,8 @@ class Tests_MGExtractIfE auto [mg_edgelist_srcs, mg_edgelist_dsts, mg_edgelist_weights] = extract_if_e(*handle_, mg_graph_view, - mg_src_properties.view(), - mg_dst_properties.view(), + mg_src_prop.view(), + mg_dst_prop.view(), [] __device__(vertex_t src, vertex_t dst, auto src_val, auto dst_val) { return src_val < dst_val; }); @@ -230,7 +183,7 @@ class Tests_MGExtractIfE // 3-1. aggregate MG results auto mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - *handle_, (*mg_renumber_map_labels).data(), (*mg_renumber_map_labels).size()); + *handle_, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); auto mg_aggregate_edgelist_srcs = cugraph::test::device_gatherv(*handle_, mg_edgelist_srcs.data(), mg_edgelist_srcs.size()); auto mg_aggregate_edgelist_dsts = @@ -267,35 +220,21 @@ class Tests_MGExtractIfE // 3-4. run SG extract_if_e - auto sg_property_buffer = cugraph::allocate_dataframe_buffer( - sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform( - handle_->get_thrust_policy(), + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - cugraph::get_dataframe_buffer_begin(sg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t sg_src_properties( - *handle_, sg_graph_view); - cugraph::edge_dst_property_t sg_dst_properties( - *handle_, sg_graph_view); - - update_edge_src_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_src_properties); - update_edge_dst_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_dst_properties); + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); auto [sg_edgelist_srcs, sg_edgelist_dsts, sg_edgelist_weights] = extract_if_e(*handle_, sg_graph_view, - sg_src_properties.view(), - sg_dst_properties.view(), + sg_src_prop.view(), + sg_dst_prop.view(), [] __device__(vertex_t src, vertex_t dst, auto src_val, auto dst_val) { return src_val < dst_val; }); diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index 049a9583d0c..2be87bf2508 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -57,39 +59,6 @@ #include #include -template -__device__ __host__ auto make_type_casted_tuple_from_scalar(T val, std::index_sequence) -{ - return thrust::make_tuple( - static_cast::type>(val)...); -} - -template -__device__ __host__ auto make_property_value(T val) -{ - property_t ret{}; - if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { - ret = make_type_casted_tuple_from_scalar( - val, std::make_index_sequence::value>{}); - } else { - ret = static_cast(val); - } - return ret; -} - -template -struct property_transform_t { - int mod{}; - - constexpr __device__ property_t operator()(vertex_t const v) const - { - static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || - std::is_arithmetic_v); - cuco::detail::MurmurHash3_32 hash_func{}; - return make_property_value(hash_func(v) % mod); - } -}; - template struct e_op_t { static_assert(std::is_same_v || @@ -174,7 +143,7 @@ class Tests_MGExtractTransformVFrontierOutgoingE typename output_payload_t> void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { - using property_t = int32_t; + using result_t = int32_t; using key_t = std::conditional_t, vertex_t, thrust::tuple>; @@ -200,7 +169,7 @@ class Tests_MGExtractTransformVFrontierOutgoingE hr_clock.start(); } - auto [mg_graph, mg_renumber_map_labels] = + auto [mg_graph, d_mg_renumber_map_labels] = cugraph::test::construct_graph( *handle_, input_usecase, false, renumber); @@ -218,28 +187,12 @@ class Tests_MGExtractTransformVFrontierOutgoingE const int hash_bin_count = 5; - auto mg_property_buffer = cugraph::allocate_dataframe_buffer( - mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform(handle_->get_thrust_policy(), - (*mg_renumber_map_labels).begin(), - (*mg_renumber_map_labels).end(), - cugraph::get_dataframe_buffer_begin(mg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t mg_src_properties( - *handle_, mg_graph_view); - cugraph::edge_dst_property_t mg_dst_properties( - *handle_, mg_graph_view); - - update_edge_src_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_src_properties); - update_edge_dst_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_dst_properties); + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); auto mg_key_buffer = cugraph::allocate_dataframe_buffer( mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -252,7 +205,7 @@ class Tests_MGExtractTransformVFrontierOutgoingE thrust::tabulate(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_key_buffer), cugraph::get_dataframe_buffer_end(mg_key_buffer), - [mg_renumber_map_labels = (*mg_renumber_map_labels).data(), + [mg_renumber_map_labels = (*d_mg_renumber_map_labels).data(), local_vertex_partition_range_first = mg_graph_view.local_vertex_partition_range_first()] __device__(size_t i) { return thrust::make_tuple( @@ -280,9 +233,9 @@ class Tests_MGExtractTransformVFrontierOutgoingE *handle_, mg_graph_view, mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_properties.view(), - mg_dst_properties.view(), - e_op_t{}); + mg_src_prop.view(), + mg_dst_prop.view(), + e_op_t{}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -297,10 +250,10 @@ class Tests_MGExtractTransformVFrontierOutgoingE if (prims_usecase.check_correctness) { auto mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - *handle_, (*mg_renumber_map_labels).data(), (*mg_renumber_map_labels).size()); + *handle_, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); auto mg_aggregate_extract_transform_output_buffer = cugraph::allocate_dataframe_buffer< - typename e_op_t::return_type::value_type>( + typename e_op_t::return_type::value_type>( size_t{0}, handle_->get_stream()); std::get<0>(mg_aggregate_extract_transform_output_buffer) = cugraph::test::device_gatherv(*handle_, @@ -340,28 +293,15 @@ class Tests_MGExtractTransformVFrontierOutgoingE *handle_, input_usecase, false, false); auto sg_graph_view = sg_graph.view(); - auto sg_property_buffer = cugraph::allocate_dataframe_buffer( - sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform( - handle_->get_thrust_policy(), - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_size()), - cugraph::get_dataframe_buffer_begin(sg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t sg_src_properties( - *handle_, sg_graph_view); - cugraph::edge_dst_property_t sg_dst_properties( - *handle_, sg_graph_view); - update_edge_src_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_src_properties); - update_edge_dst_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_dst_properties); + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); auto sg_key_buffer = cugraph::allocate_dataframe_buffer( sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -391,9 +331,9 @@ class Tests_MGExtractTransformVFrontierOutgoingE *handle_, sg_graph_view, sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_properties.view(), - sg_dst_properties.view(), - e_op_t{}); + sg_src_prop.view(), + sg_dst_prop.view(), + e_op_t{}); thrust::sort(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_extract_transform_output_buffer), diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index 9881cbade01..af5703b32ea 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -52,111 +54,6 @@ #include -template -struct property_type { - using type = std::conditional_t<(sizeof...(Args) > 1), - thrust::tuple, - typename thrust::tuple_element<0, thrust::tuple>::type>; -}; - -template -struct property_transform - : public thrust::unary_function::type> { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - - template ::type> - constexpr __device__ - typename std::enable_if_t::value, type> - operator()(const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return thrust::make_tuple(static_cast(value)...); - } - - template ::type> - constexpr __device__ typename std::enable_if_t::value, type> operator()( - const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return static_cast(value); - } -}; - -template typename Tuple, typename... Args> -struct property_transform> : public property_transform { -}; - -template -struct generate_impl { - private: - using property_buffer_type = std::conditional_t< - (sizeof...(Args) > 1), - std::tuple...>, - rmm::device_uvector>::type>>; - - public: - using type = typename property_type::type; - static thrust::tuple initial_value(int init) - { - return thrust::make_tuple(static_cast(init)...); - } - template - static auto vertex_property(rmm::device_uvector& labels, - int hash_bin_count, - raft::handle_t const& handle) - { - auto data = cugraph::allocate_dataframe_buffer(labels.size(), handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - labels.begin(), - labels.end(), - zip, - property_transform(hash_bin_count)); - return data; - } - template - static auto vertex_property(thrust::counting_iterator begin, - thrust::counting_iterator end, - int hash_bin_count, - raft::handle_t const& handle) - { - auto length = thrust::distance(begin, end); - auto data = cugraph::allocate_dataframe_buffer(length, handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - begin, - end, - zip, - property_transform(hash_bin_count)); - return data; - } - - template - static auto column_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_dst_property_t(handle, graph_view); - update_edge_dst_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } - - template - static auto row_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_src_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } -}; - template struct comparator { static constexpr double threshold_ratio{1e-2}; @@ -211,14 +108,6 @@ buffer_type aggregate(const raft::handle_t& handle, const buffer_type& result) return aggregated_result; } -template -struct generate : public generate_impl { - static T initial_value(int init) { return static_cast(init); } -}; -template -struct generate> : public generate_impl { -}; - struct Prims_Usecase { bool check_correctness{true}; bool test_weighted{false}; @@ -273,16 +162,19 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE const int hash_bin_count = 5; const int initial_value = 4; - auto property_initial_value = generate::initial_value(initial_value); - using property_t = decltype(property_initial_value); - auto vertex_property_data = - generate::vertex_property((*d_mg_renumber_map_labels), hash_bin_count, *handle_); - auto col_prop = - generate::column_property(*handle_, mg_graph_view, vertex_property_data); - auto row_prop = generate::row_property(*handle_, mg_graph_view, vertex_property_data); - auto out_result = cugraph::allocate_dataframe_buffer( + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); + + auto out_result = cugraph::allocate_dataframe_buffer( mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - auto in_result = cugraph::allocate_dataframe_buffer( + auto in_result = cugraph::allocate_dataframe_buffer( mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); if (cugraph::test::g_perf) { @@ -294,13 +186,13 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE per_v_transform_reduce_incoming_e( *handle_, mg_graph_view, - row_prop.view(), - col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + mg_src_prop.view(), + mg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value, @@ -323,13 +215,13 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE per_v_transform_reduce_outgoing_e( *handle_, mg_graph_view, - row_prop.view(), - col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + mg_src_prop.view(), + mg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value, @@ -352,46 +244,46 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE *handle_, input_usecase, true, false); auto sg_graph_view = sg_graph.view(); - auto sg_vertex_property_data = generate::vertex_property( + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - hash_bin_count, - *handle_); - auto sg_col_prop = - generate::column_property(*handle_, sg_graph_view, sg_vertex_property_data); - auto sg_row_prop = - generate::row_property(*handle_, sg_graph_view, sg_vertex_property_data); + hash_bin_count); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); result_compare comp{*handle_}; - auto global_out_result = cugraph::allocate_dataframe_buffer( + auto global_out_result = cugraph::allocate_dataframe_buffer( sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); per_v_transform_reduce_outgoing_e( *handle_, sg_graph_view, - sg_row_prop.view(), - sg_col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + sg_src_prop.view(), + sg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value, cugraph::get_dataframe_buffer_begin(global_out_result)); - auto global_in_result = cugraph::allocate_dataframe_buffer( + auto global_in_result = cugraph::allocate_dataframe_buffer( sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); per_v_transform_reduce_incoming_e( *handle_, sg_graph_view, - sg_row_prop.view(), - sg_col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + sg_src_prop.view(), + sg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value, @@ -427,15 +319,15 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_File, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>(std::get<0>(param), - std::get<1>(param)); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>( + run_current_test, false>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -444,15 +336,15 @@ TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>(std::get<0>(param), - std::get<1>(param)); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>( + run_current_test, true>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu index e488bf2bf4f..043dd4658da 100644 --- a/cpp/tests/prims/mg_reduce_v.cu +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -48,87 +50,6 @@ #include -template -struct property_transform : public thrust::unary_function> { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - constexpr __device__ auto operator()(const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return thrust::make_tuple(static_cast(value)...); - } -}; - -template typename Tuple, typename... Args> -struct property_transform> : public property_transform { -}; - -template -auto make_iterator_tuple(Tuple& data, std::index_sequence) -{ - return thrust::make_tuple((std::get(data).begin())...); -} - -template -auto get_zip_iterator(std::tuple& data) -{ - return thrust::make_zip_iterator(make_iterator_tuple( - data, std::make_index_sequence>::value>())); -} - -template -auto get_property_iterator(std::tuple& data) -{ - return (std::get<0>(data)).begin(); -} - -template -auto get_property_iterator(std::tuple& data) -{ - return get_zip_iterator(data); -} - -template -struct generate_impl { - static thrust::tuple initial_value(int init) - { - return thrust::make_tuple(static_cast(init)...); - } - - template - static std::tuple...> property(rmm::device_uvector& labels, - int hash_bin_count, - raft::handle_t const& handle) - { - auto data = std::make_tuple(rmm::device_uvector(labels.size(), handle.get_stream())...); - auto zip = get_zip_iterator(data); - thrust::transform(handle.get_thrust_policy(), - labels.begin(), - labels.end(), - zip, - property_transform(hash_bin_count)); - return data; - } - - template - static std::tuple...> property(thrust::counting_iterator begin, - thrust::counting_iterator end, - int hash_bin_count, - raft::handle_t const& handle) - { - auto length = thrust::distance(begin, end); - auto data = std::make_tuple(rmm::device_uvector(length, handle.get_stream())...); - auto zip = get_zip_iterator(data); - thrust::transform(handle.get_thrust_policy(), - begin, - end, - zip, - property_transform(hash_bin_count)); - return data; - } -}; - template struct result_compare { static constexpr double threshold_ratio{1e-2}; @@ -173,15 +94,6 @@ struct result_compare> { } }; -template -struct generate : public generate_impl { - static T initial_value(int init) { return static_cast(init); } -}; - -template -struct generate> : public generate_impl { -}; - struct Prims_Usecase { bool check_correctness{true}; }; @@ -236,17 +148,18 @@ class Tests_MGReduceV const int hash_bin_count = 5; const int initial_value = 10; - auto property_initial_value = generate::initial_value(initial_value); - using property_t = decltype(property_initial_value); - auto property_data = - generate::property((*d_mg_renumber_map_labels), hash_bin_count, *handle_); - auto property_iter = get_property_iterator(property_data); + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, (*d_mg_renumber_map_labels), hash_bin_count); + auto property_iter = cugraph::get_dataframe_buffer_begin(mg_vertex_prop); enum class reduction_type_t { PLUS, MINIMUM, MAXIMUM }; reduction_type_t reduction_types[] = { reduction_type_t::PLUS, reduction_type_t::MINIMUM, reduction_type_t::MAXIMUM}; - std::unordered_map results; + std::unordered_map results; for (auto reduction_type : reduction_types) { if (cugraph::test::g_perf) { @@ -261,21 +174,21 @@ class Tests_MGReduceV mg_graph_view, property_iter, property_initial_value, - cugraph::reduce_op::plus{}); + cugraph::reduce_op::plus{}); break; case reduction_type_t::MINIMUM: results[reduction_type] = reduce_v(*handle_, mg_graph_view, property_iter, property_initial_value, - cugraph::reduce_op::minimum{}); + cugraph::reduce_op::minimum{}); break; case reduction_type_t::MAXIMUM: results[reduction_type] = reduce_v(*handle_, mg_graph_view, property_iter, property_initial_value, - cugraph::reduce_op::maximum{}); + cugraph::reduce_op::maximum{}); break; default: FAIL() << "should not be reached."; } @@ -298,40 +211,40 @@ class Tests_MGReduceV *handle_, input_usecase, true, false); auto sg_graph_view = sg_graph.view(); - auto sg_property_data = generate::property( + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - hash_bin_count, - *handle_); - auto sg_property_iter = get_property_iterator(sg_property_data); + hash_bin_count); + auto sg_property_iter = cugraph::get_dataframe_buffer_begin(sg_vertex_prop); for (auto reduction_type : reduction_types) { - property_t expected_result{}; + result_t expected_result{}; switch (reduction_type) { case reduction_type_t::PLUS: expected_result = reduce_v(*handle_, sg_graph_view, sg_property_iter, property_initial_value, - cugraph::reduce_op::plus{}); + cugraph::reduce_op::plus{}); break; case reduction_type_t::MINIMUM: expected_result = reduce_v(*handle_, sg_graph_view, sg_property_iter, property_initial_value, - cugraph::reduce_op::minimum{}); + cugraph::reduce_op::minimum{}); break; case reduction_type_t::MAXIMUM: expected_result = reduce_v(*handle_, sg_graph_view, sg_property_iter, property_initial_value, - cugraph::reduce_op::maximum{}); + cugraph::reduce_op::maximum{}); break; default: FAIL() << "should not be reached."; } - result_compare compare{}; + result_compare compare{}; ASSERT_TRUE(compare(expected_result, results[reduction_type])); } } @@ -350,14 +263,14 @@ using Tests_MGReduceV_Rmat = Tests_MGReduceV; TEST_P(Tests_MGReduceV_File, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>(std::get<0>(param), - std::get<1>(param)); + run_current_test, false>( + std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>( + run_current_test, false>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -365,14 +278,14 @@ TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) TEST_P(Tests_MGReduceV_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>(std::get<0>(param), - std::get<1>(param)); + run_current_test, true>( + std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>( + run_current_test, true>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -380,54 +293,54 @@ TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) TEST_P(Tests_MGReduceV_File, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTransposeFalse) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int64FloatTransposeFalse) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } TEST_P(Tests_MGReduceV_Rmat, CheckInt64Int64FloatTransposeFalse) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } TEST_P(Tests_MGReduceV_File, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int32FloatTransposeTrue) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } TEST_P(Tests_MGReduceV_Rmat, CheckInt32Int64FloatTransposeTrue) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } TEST_P(Tests_MGReduceV_Rmat, CheckInt64Int64FloatTransposeTrue) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index 82356809952..6935149b6d2 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -50,111 +52,6 @@ #include -template -struct property_type { - using type = std::conditional_t<(sizeof...(Args) > 1), - thrust::tuple, - typename thrust::tuple_element<0, thrust::tuple>::type>; -}; - -template -struct property_transform - : public thrust::unary_function::type> { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - - template ::type> - constexpr __device__ - typename std::enable_if_t::value, type> - operator()(const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return thrust::make_tuple(static_cast(value)...); - } - - template ::type> - constexpr __device__ typename std::enable_if_t::value, type> operator()( - const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return static_cast(value); - } -}; - -template typename Tuple, typename... Args> -struct property_transform> : public property_transform { -}; - -template -struct generate_impl { - private: - using type = typename property_type::type; - using property_buffer_type = std::conditional_t< - (sizeof...(Args) > 1), - std::tuple...>, - rmm::device_uvector>::type>>; - - public: - static thrust::tuple initial_value(int init) - { - return thrust::make_tuple(static_cast(init)...); - } - template - static auto vertex_property(rmm::device_uvector& labels, - int hash_bin_count, - raft::handle_t const& handle) - { - auto data = cugraph::allocate_dataframe_buffer(labels.size(), handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - labels.begin(), - labels.end(), - zip, - property_transform(hash_bin_count)); - return data; - } - template - static auto vertex_property(thrust::counting_iterator begin, - thrust::counting_iterator end, - int hash_bin_count, - raft::handle_t const& handle) - { - auto length = thrust::distance(begin, end); - auto data = cugraph::allocate_dataframe_buffer(length, handle.get_stream()); - auto zip = cugraph::get_dataframe_buffer_begin(data); - thrust::transform(handle.get_thrust_policy(), - begin, - end, - zip, - property_transform(hash_bin_count)); - return data; - } - - template - static auto column_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_dst_property_t(handle, graph_view); - update_edge_dst_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } - - template - static auto row_property(raft::handle_t const& handle, - graph_view_type const& graph_view, - property_buffer_type& property) - { - auto output_property = cugraph::edge_src_property_t(handle, graph_view); - update_edge_src_property( - handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); - return output_property; - } -}; - template struct result_compare { static constexpr double threshold_ratio{1e-3}; @@ -193,14 +90,6 @@ struct result_compare> { } }; -template -struct generate : public generate_impl { - static T initial_value(int init) { return static_cast(init); } -}; -template -struct generate> : public generate_impl { -}; - struct Prims_Usecase { bool check_correctness{true}; bool test_weighted{false}; @@ -255,13 +144,14 @@ class Tests_MGTransformReduceE const int hash_bin_count = 5; const int initial_value = 4; - auto property_initial_value = generate::initial_value(initial_value); - using property_t = decltype(property_initial_value); - auto vertex_property_data = - generate::vertex_property((*d_mg_renumber_map_labels), hash_bin_count, *handle_); - auto col_prop = - generate::column_property(*handle_, mg_graph_view, vertex_property_data); - auto row_prop = generate::row_property(*handle_, mg_graph_view, vertex_property_data); + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -272,13 +162,13 @@ class Tests_MGTransformReduceE auto result = transform_reduce_e( *handle_, mg_graph_view, - row_prop.view(), - col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + mg_src_prop.view(), + mg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value); @@ -300,30 +190,30 @@ class Tests_MGTransformReduceE *handle_, input_usecase, true, false); auto sg_graph_view = sg_graph.view(); - auto sg_vertex_property_data = generate::vertex_property( + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - hash_bin_count, - *handle_); - auto sg_col_prop = - generate::column_property(*handle_, sg_graph_view, sg_vertex_property_data); - auto sg_row_prop = - generate::row_property(*handle_, sg_graph_view, sg_vertex_property_data); + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); auto expected_result = transform_reduce_e( *handle_, sg_graph_view, - sg_row_prop.view(), - sg_col_prop.view(), - [] __device__(auto row, auto col, weight_t wt, auto row_property, auto col_property) { - if (row_property < col_property) { - return row_property; + sg_src_prop.view(), + sg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t wt, auto src_property, auto dst_property) { + if (src_property < dst_property) { + return src_property; } else { - return col_property; + return dst_property; } }, property_initial_value); - result_compare compare{}; + result_compare compare{}; ASSERT_TRUE(compare(expected_result, result)); } } @@ -341,14 +231,14 @@ using Tests_MGTransformReduceE_Rmat = Tests_MGTransformReduceE, false>(std::get<0>(param), - std::get<1>(param)); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>( + run_current_test, false>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -356,14 +246,14 @@ TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose TEST_P(Tests_MGTransformReduceE_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>(std::get<0>(param), - std::get<1>(param)); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGTransformReduceE_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>( + run_current_test, true>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index d7b2f18cdd8..e4f2560f41b 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -44,28 +46,14 @@ #include -template -struct property_transform : public thrust::unary_function { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - constexpr __device__ auto operator()(vertex_t, const vertex_t& val) - { - cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return static_cast(value); - } -}; +template +struct v_op_t { + int32_t mod{}; -template -struct property_transform> - : public thrust::unary_function> { - int mod{}; - property_transform(int mod_count) : mod(mod_count) {} - constexpr __device__ auto operator()(vertex_t, const vertex_t& val) + __device__ auto operator()(vertex_t, vertex_t val) const { cuco::detail::MurmurHash3_32 hash_func{}; - auto value = hash_func(val) % mod; - return thrust::make_tuple(static_cast(value)...); + return cugraph::test::detail::make_property_value(hash_func(val) % mod); } }; @@ -113,18 +101,6 @@ struct result_compare> { } }; -template -struct generate { - static T initial_value(int init) { return static_cast(init); } -}; -template -struct generate> { - static thrust::tuple initial_value(int init) - { - return thrust::make_tuple(static_cast(init)...); - } -}; - struct Prims_Usecase { bool check_correctness{true}; }; @@ -178,14 +154,14 @@ class Tests_MGTransformReduceV const int hash_bin_count = 5; const int initial_value = 10; - property_transform prop(hash_bin_count); - auto property_initial_value = generate::initial_value(initial_value); - using property_t = decltype(property_initial_value); + v_op_t v_op{hash_bin_count}; + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); enum class reduction_type_t { PLUS, MINIMUM, MAXIMUM }; reduction_type_t reduction_types[] = { reduction_type_t::PLUS, reduction_type_t::MINIMUM, reduction_type_t::MAXIMUM}; - std::unordered_map results; + std::unordered_map results; for (auto reduction_type : reduction_types) { if (cugraph::test::g_perf) { @@ -198,26 +174,26 @@ class Tests_MGTransformReduceV case reduction_type_t::PLUS: results[reduction_type] = transform_reduce_v(*handle_, mg_graph_view, - d_mg_renumber_map_labels->begin(), - prop, + (*d_mg_renumber_map_labels).begin(), + v_op, property_initial_value, - cugraph::reduce_op::plus{}); + cugraph::reduce_op::plus{}); break; case reduction_type_t::MINIMUM: results[reduction_type] = transform_reduce_v(*handle_, mg_graph_view, - d_mg_renumber_map_labels->begin(), - prop, + (*d_mg_renumber_map_labels).begin(), + v_op, property_initial_value, - cugraph::reduce_op::minimum{}); + cugraph::reduce_op::minimum{}); break; case reduction_type_t::MAXIMUM: results[reduction_type] = transform_reduce_v(*handle_, mg_graph_view, - d_mg_renumber_map_labels->begin(), - prop, + (*d_mg_renumber_map_labels).begin(), + v_op, property_initial_value, - cugraph::reduce_op::maximum{}); + cugraph::reduce_op::maximum{}); break; default: FAIL() << "should not be reached."; } @@ -241,38 +217,38 @@ class Tests_MGTransformReduceV auto sg_graph_view = sg_graph.view(); for (auto reduction_type : reduction_types) { - property_t expected_result{}; + result_t expected_result{}; switch (reduction_type) { case reduction_type_t::PLUS: expected_result = transform_reduce_v( *handle_, sg_graph_view, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), - prop, + v_op, property_initial_value, - cugraph::reduce_op::plus{}); + cugraph::reduce_op::plus{}); break; case reduction_type_t::MINIMUM: expected_result = transform_reduce_v( *handle_, sg_graph_view, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), - prop, + v_op, property_initial_value, - cugraph::reduce_op::minimum{}); + cugraph::reduce_op::minimum{}); break; case reduction_type_t::MAXIMUM: expected_result = transform_reduce_v( *handle_, sg_graph_view, thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), - prop, + v_op, property_initial_value, - cugraph::reduce_op::maximum{}); + cugraph::reduce_op::maximum{}); break; default: FAIL() << "should not be reached."; } - result_compare compare{}; + result_compare compare{}; ASSERT_TRUE(compare(expected_result, results[reduction_type])); } } @@ -291,14 +267,14 @@ using Tests_MGTransformReduceV_Rmat = Tests_MGTransformReduceV, false>(std::get<0>(param), - std::get<1>(param)); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) { auto param = GetParam(); - run_current_test, false>( + run_current_test, false>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -306,14 +282,14 @@ TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTranspose TEST_P(Tests_MGTransformReduceV_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>(std::get<0>(param), - std::get<1>(param)); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGTransformReduceV_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) { auto param = GetParam(); - run_current_test, true>( + run_current_test, true>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index ff8f0850368..25edd3b2748 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -56,39 +58,6 @@ #include -template -__device__ __host__ auto make_type_casted_tuple_from_scalar(T val, std::index_sequence) -{ - return thrust::make_tuple( - static_cast::type>(val)...); -} - -template -__device__ __host__ auto make_property_value(T val) -{ - property_t ret{}; - if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { - ret = make_type_casted_tuple_from_scalar( - val, std::make_index_sequence::value>{}); - } else { - ret = static_cast(val); - } - return ret; -} - -template -struct property_transform_t { - int mod{}; - - constexpr __device__ property_t operator()(vertex_t const v) const - { - static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || - std::is_arithmetic_v); - cuco::detail::MurmurHash3_32 hash_func{}; - return make_property_value(hash_func(v) % mod); - } -}; - template struct e_op_t { __device__ auto operator()(key_t optionally_tagged_src, @@ -169,7 +138,7 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst hr_clock.start(); } - auto [mg_graph, mg_renumber_map_labels] = + auto [mg_graph, d_mg_renumber_map_labels] = cugraph::test::construct_graph( *handle_, input_usecase, false, renumber); @@ -187,28 +156,12 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst const int hash_bin_count = 5; - auto mg_property_buffer = cugraph::allocate_dataframe_buffer( - mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform(handle_->get_thrust_policy(), - (*mg_renumber_map_labels).begin(), - (*mg_renumber_map_labels).end(), - cugraph::get_dataframe_buffer_begin(mg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t mg_src_properties( - *handle_, mg_graph_view); - cugraph::edge_dst_property_t mg_dst_properties( - *handle_, mg_graph_view); - - update_edge_src_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_src_properties); - update_edge_dst_property(*handle_, - mg_graph_view, - cugraph::get_dataframe_buffer_cbegin(mg_property_buffer), - mg_dst_properties); + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); auto mg_key_buffer = cugraph::allocate_dataframe_buffer( mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -221,7 +174,7 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst thrust::tabulate(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_key_buffer), cugraph::get_dataframe_buffer_end(mg_key_buffer), - [mg_renumber_map_labels = (*mg_renumber_map_labels).data(), + [mg_renumber_map_labels = (*d_mg_renumber_map_labels).data(), local_vertex_partition_range_first = mg_graph_view.local_vertex_partition_range_first()] __device__(size_t i) { return thrust::make_tuple( @@ -255,8 +208,8 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst *handle_, mg_graph_view, mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_properties.view(), - mg_dst_properties.view(), + mg_src_prop.view(), + mg_dst_prop.view(), e_op_t{}, cugraph::reduce_op::null{}); } else { @@ -265,8 +218,8 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst *handle_, mg_graph_view, mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_properties.view(), - mg_dst_properties.view(), + mg_src_prop.view(), + mg_dst_prop.view(), e_op_t{}, cugraph::reduce_op::plus{}); } @@ -284,7 +237,7 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst if (prims_usecase.check_correctness) { auto mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( - *handle_, (*mg_renumber_map_labels).data(), (*mg_renumber_map_labels).size()); + *handle_, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); auto mg_aggregate_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); @@ -352,28 +305,15 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst *handle_, input_usecase, false, false); auto sg_graph_view = sg_graph.view(); - auto sg_property_buffer = cugraph::allocate_dataframe_buffer( - sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); - - thrust::transform( - handle_->get_thrust_policy(), - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_size()), - cugraph::get_dataframe_buffer_begin(sg_property_buffer), - property_transform_t{hash_bin_count}); - - cugraph::edge_src_property_t sg_src_properties( - *handle_, sg_graph_view); - cugraph::edge_dst_property_t sg_dst_properties( - *handle_, sg_graph_view); - update_edge_src_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_src_properties); - update_edge_dst_property(*handle_, - sg_graph_view, - cugraph::get_dataframe_buffer_cbegin(sg_property_buffer), - sg_dst_properties); + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); auto sg_key_buffer = cugraph::allocate_dataframe_buffer( sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -408,8 +348,8 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst *handle_, sg_graph_view, sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_properties.view(), - sg_dst_properties.view(), + sg_src_prop.view(), + sg_dst_prop.view(), e_op_t{}, cugraph::reduce_op::null{}); } else { @@ -418,8 +358,8 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst *handle_, sg_graph_view, sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_properties.view(), - sg_dst_properties.view(), + sg_src_prop.view(), + sg_dst_prop.view(), e_op_t{}, cugraph::reduce_op::plus{}); } diff --git a/cpp/tests/prims/property_generator.cuh b/cpp/tests/prims/property_generator.cuh new file mode 100644 index 00000000000..fa779778770 --- /dev/null +++ b/cpp/tests/prims/property_generator.cuh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +namespace detail { + +template +__host__ __device__ auto make_type_casted_tuple_from_scalar(T val, std::index_sequence) +{ + return thrust::make_tuple( + static_cast::type>(val)...); +} + +template +__host__ __device__ auto make_property_value(T val) +{ + property_t ret{}; + if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { + ret = make_type_casted_tuple_from_scalar( + val, std::make_index_sequence::value>{}); + } else { + ret = static_cast(val); + } + return ret; +} + +template +struct property_transform { + int32_t mod{}; + + constexpr __device__ property_t operator()(vertex_t v) const + { + static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || + std::is_arithmetic_v); + cuco::detail::MurmurHash3_32 hash_func{}; + return make_property_value(hash_func(v) % mod); + } +}; + +} // namespace detail + +template +struct generate { + private: + using property_buffer_type = + decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); + + public: + static property_t initial_value(int32_t init) + { + return detail::make_property_value(init); + } + + static auto vertex_property(raft::handle_t const& handle, + rmm::device_uvector const& labels, + int32_t hash_bin_count) + { + auto data = cugraph::allocate_dataframe_buffer(labels.size(), handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + labels.begin(), + labels.end(), + cugraph::get_dataframe_buffer_begin(data), + detail::property_transform{hash_bin_count}); + return data; + } + + static auto vertex_property(raft::handle_t const& handle, + thrust::counting_iterator begin, + thrust::counting_iterator end, + int32_t hash_bin_count) + { + auto length = thrust::distance(begin, end); + auto data = cugraph::allocate_dataframe_buffer(length, handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + begin, + end, + cugraph::get_dataframe_buffer_begin(data), + detail::property_transform{hash_bin_count}); + return data; + } + + template + static auto src_property(raft::handle_t const& handle, + graph_view_type const& graph_view, + property_buffer_type const& property) + { + auto output_property = + cugraph::edge_src_property_t(handle, graph_view); + update_edge_src_property( + handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); + return output_property; + } + + template + static auto dst_property(raft::handle_t const& handle, + graph_view_type const& graph_view, + property_buffer_type const& property) + { + auto output_property = + cugraph::edge_dst_property_t(handle, graph_view); + update_edge_dst_property( + handle, graph_view, cugraph::get_dataframe_buffer_begin(property), output_property); + return output_property; + } +}; + +} // namespace test +} // namespace cugraph From 368740ca1ce51e6a8675b97b4da3e8300dbf2ef2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 15 Sep 2022 00:10:02 -0700 Subject: [PATCH 04/18] fix file name --- ...utgoine_e.cuh => per_v_random_select_transform_outgoing_e.cuh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/src/prims/{per_v_random_select_transform_outgoine_e.cuh => per_v_random_select_transform_outgoing_e.cuh} (100%) diff --git a/cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh similarity index 100% rename from cpp/src/prims/per_v_random_select_transform_outgoine_e.cuh rename to cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh From 1c24e1274b02d487fd8482e0c612cbccb4e8cb67 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 19 Sep 2022 09:50:29 -0700 Subject: [PATCH 05/18] add to_thrust_tuple utility function --- cpp/include/cugraph/utilities/thrust_tuple_utils.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp index 74e04af463c..230de14326a 100644 --- a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp +++ b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp @@ -184,6 +184,18 @@ auto std_tuple_to_thrust_tuple(TupleType tup) tup, std::make_index_sequence>{}); } +template +auto to_thrust_tuple(T scalar_value) +{ + return thrust::make_tuple(scalar_value); +} + +template +auto to_thrust_tuple(thrust::tuple tuple_value) +{ + return tuple_value; +} + // a temporary function to emulate thrust::tuple_cat (not supported) using std::tuple_cat (should // retire once thrust::tuple is replaced with cuda::std::tuple) template From 193cb0dd4b4c06cd5732886ac9897716fb4e9565 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 19 Sep 2022 09:51:05 -0700 Subject: [PATCH 06/18] fix compile error in vertex_frontier.cuh --- cpp/src/prims/vertex_frontier.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/prims/vertex_frontier.cuh b/cpp/src/prims/vertex_frontier.cuh index 72fec4518cf..1e628893235 100644 --- a/cpp/src/prims/vertex_frontier.cuh +++ b/cpp/src/prims/vertex_frontier.cuh @@ -153,7 +153,8 @@ class key_bucket_t { vertices_ = std::move(merged_vertices); } else { auto cur_size = vertices_.size(); - vertices_.resize(cur_size + thrust::distance(vertex_first, vertex_last)); + vertices_.resize(cur_size + thrust::distance(vertex_first, vertex_last), + handle_ptr_->get_stream()); thrust::copy(handle_ptr_->get_thrust_policy(), vertex_first, vertex_last, From 313dac9582f27b0c890d380bc17762a66acf71e9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 19 Sep 2022 13:19:15 -0700 Subject: [PATCH 07/18] fix compiler warnings --- .../cugraph/edge_partition_device_view.cuh | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 8fae1f93054..e0538a78241 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -211,13 +211,16 @@ class edge_partition_device_view_t local_degrees(this->major_range_size(), stream); if (dcs_nzd_vertices_) { - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(this->major_range_first()), - thrust::make_counting_iterator(this->major_range_last()), - local_degrees.begin(), - detail::local_degree_op_t{ - this->offsets_, major_range_first_, *dcs_nzd_vertices_, *major_hypersparse_first_}); + assert(major_hypersparse_first_); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(this->major_range_first()), + thrust::make_counting_iterator(this->major_range_last()), + local_degrees.begin(), + detail::local_degree_op_t{ + this->offsets_, + major_range_first_, + *dcs_nzd_vertices_, + major_hypersparse_first_.value_or(vertex_t{0})}); } else { thrust::transform( rmm::exec_policy(stream), @@ -235,13 +238,16 @@ class edge_partition_device_view_t local_degrees(majors.size(), stream); if (dcs_nzd_vertices_) { - thrust::transform( - rmm::exec_policy(stream), - majors.begin(), - majors.end(), - local_degrees.begin(), - detail::local_degree_op_t{ - this->offsets_, major_range_first_, *dcs_nzd_vertices_, *major_hypersparse_first_}); + assert(major_hypersparse_first_); + thrust::transform(rmm::exec_policy(stream), + majors.begin(), + majors.end(), + local_degrees.begin(), + detail::local_degree_op_t{ + this->offsets_, + major_range_first_, + dcs_nzd_vertices_.value(), + major_hypersparse_first_.value_or(vertex_t{0})}); } else { thrust::transform( rmm::exec_policy(stream), From f57568a146e9088d8d87fa5d1bb4a0874a7cabb3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 19 Sep 2022 13:19:36 -0700 Subject: [PATCH 08/18] add test suit --- cpp/tests/CMakeLists.txt | 6 + ...er_v_random_select_transform_outgoing_e.cu | 292 ++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4179ee026e0..c8357b89c50 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -534,6 +534,12 @@ if(BUILD_CUGRAPH_MG_TESTS) prims/mg_extract_transform_v_frontier_outgoing_e.cu) target_link_libraries(MG_EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E_TEST PRIVATE cuco::cuco) + ########################################################################################### + # - MG PRIMS PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E tests ------------------------------- + ConfigureTestMG(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST + prims/mg_per_v_random_select_transform_outgoing_e.cu) + target_link_libraries(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST PRIVATE cuco::cuco) + ########################################################################################### # - MG GATHER_UTILS tests ----------------------------------------------------------------- ConfigureTestMG(MG_GATHER_UTILS_TEST sampling/detail/mg_gather_utils.cu) diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu new file mode 100644 index 00000000000..a1b6df9ad7d --- /dev/null +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "property_generator.cuh" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +template +struct e_op_t { + using result_t = decltype(cugraph::thrust_tuple_cat(thrust::tuple{}, + cugraph::to_thrust_tuple(property_t{}), + cugraph::to_thrust_tuple(property_t{}))); + + __device__ result_t operator()(vertex_t src, + vertex_t dst, + property_t src_prop, + property_t dst_prop) const + { + if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { + static_assert(thrust::tuple_size::value == size_t{2}); + return thrust::make_tuple(src, + dst, + thrust::get<0>(src_prop), + thrust::get<1>(src_prop), + thrust::get<0>(dst_prop), + thrust::get<1>(dst_prop)); + } else { + return thrust::make_tuple(src, dst, src_prop, dst_prop); + } + } +}; + +struct Prims_Usecase { + size_t K{0}; + bool with_replacement{false}; + bool test_weighted{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGPerVRandomSelectTransformOutgoingE + : public ::testing::TestWithParam> { + public: + Tests_MGPerVRandomSelectTransformOutgoingE() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Verify the results of per_v_random_select_transform_outgoing_e primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResClock hr_clock{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_clock.start(); + } + + auto [mg_graph, d_mg_renumber_map_labels] = + cugraph::test::construct_graph( + *handle_, input_usecase, prims_usecase.test_weighted, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 2. run MG per_v_random_select_transform_outgoing_e primitive + + const int hash_bin_count = 5; + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *d_mg_renumber_map_labels, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); + + auto mg_vertex_buffer = rmm::device_uvector( + mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + thrust::sequence(handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_vertex_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_buffer), + mg_graph_view.local_vertex_partition_range_first()); + + constexpr size_t bucket_idx_cur = 0; + constexpr size_t num_buckets = 1; + + cugraph::vertex_frontier_t mg_vertex_frontier(*handle_, + num_buckets); + mg_vertex_frontier.bucket(bucket_idx_cur) + .insert(cugraph::get_dataframe_buffer_begin(mg_vertex_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_buffer)); + + raft::random::RngState rng_state(static_cast(handle_->get_comms().get_rank())); + + using result_t = decltype(cugraph::thrust_tuple_cat(thrust::tuple{}, + cugraph::to_thrust_tuple(property_t{}), + cugraph::to_thrust_tuple(property_t{}))); + + std::optional invalid_value{std::nullopt}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_clock.start(); + } + + auto [sample_offsets, sample_e_op_results] = + cugraph::per_v_random_select_transform_outgoing_e(*handle_, + mg_graph_view, + mg_vertex_frontier.bucket(bucket_idx_cur), + mg_src_prop.view(), + mg_dst_prop.view(), + e_op_t{}, + rng_state, + prims_usecase.K, + prims_usecase.with_replacement, + invalid_value); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG per_v_random_select_transform_outgoing_e took " << elapsed_time * 1e-6 + << " s.\n"; + } + + // 3. validate MG results + + if (prims_usecase.check_correctness) { +#if 0 + cugraph::graph_t sg_graph(*handle_); + std::tie(sg_graph, std::ignore) = + cugraph::test::construct_graph( + *handle_, input_usecase, prims_usecase.test_weighted, false); + auto sg_graph_view = sg_graph.view(); + +// 1. check whether sources coincide with the local vertices in the frontier or not + +// 2. check sample counts + +// 3. check destinations exist in the input graph + +// 4. check source/destination property values + + auto sg_vertex_property_data = generate::vertex_property( + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count, + *handle_); + auto sg_dst_prop = + generate::dst_property(*handle_, sg_graph_view, sg_vertex_property_data); + auto sg_src_prop = + generate::src_property(*handle_, sg_graph_view, sg_vertex_property_data); + + auto expected_result = count_if_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + sg_dst_prop.view(), + [] __device__(auto src, auto dst, weight_t, auto src_property, auto dst_property) { + return src_property < dst_property; + }); + ASSERT_TRUE(expected_result == result); +#endif + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGPerVRandomSelectTransformOutgoingE::handle_ = nullptr; + +using Tests_MGPerVRandomSelectTransformOutgoingE_File = + Tests_MGPerVRandomSelectTransformOutgoingE; +using Tests_MGPerVRandomSelectTransformOutgoingE_Rmat = + Tests_MGPerVRandomSelectTransformOutgoingE; + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_File, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGPerVRandomSelectTransformOutgoingE_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() From 41abaf00747939dfe0a904b55473156fe128b2ae Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 19 Sep 2022 13:19:50 -0700 Subject: [PATCH 09/18] fix compile errors --- ...r_v_random_select_transform_outgoing_e.cuh | 327 ++++++++++-------- 1 file changed, 184 insertions(+), 143 deletions(-) diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 8d22b906ff6..32c863191ee 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -15,6 +15,29 @@ */ #pragma once +#include + +#include +#include + +#include +#ifndef NO_CUGRAPH_OPS +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + namespace cugraph { namespace detail { @@ -29,7 +52,8 @@ struct convert_pair_to_triplet_t { int32_t col_comm_size{}; edge_t invalid_idx{}; - __device__ int32_t operator()(thrust::tuple index_pair) const + __device__ thrust::tuple operator()( + thrust::tuple index_pair) const { auto nbr_idx = thrust::get<0>(index_pair); auto key_idx = thrust::get<1>(index_pair); @@ -83,14 +107,16 @@ struct transform_and_count_local_nbr_indices_t { LocalNbrIdxIterator local_nbr_idx_first{}; OutputValueIterator output_value_first{}; thrust::optional output_count_first{}; - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgeOp e_op{}; + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input; + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input; + EdgeOp e_op{}; edge_t invalid_idx{}; thrust::optional invalid_value{thrust::nullopt}; __device__ void operator()(size_t key_idx) const { auto key = *(key_first + key_idx); + vertex_t major{}; if constexpr (std::is_same_v) { major = key; } else { @@ -115,8 +141,8 @@ struct transform_and_count_local_nbr_indices_t { } else { thrust::tie(indices, weights, local_degree) = edge_partition.local_edges(major_offset); } - auto start_offset = *(local_nbr_idx_offset_first + key_idx); - auto end_offset = *(local_nbr_idx_offset_first + (key_idx + 1)); + auto start_offset = *(offset_first + key_idx); + auto end_offset = *(offset_first + (key_idx + 1)); size_t num_valid_local_nbr_indices{0}; for (size_t i = start_offset; i < end_offset; ++i) { @@ -137,22 +163,22 @@ struct transform_and_count_local_nbr_indices_t { key_or_src = key; key_or_dst = minor; } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - *(output_first + i) = evaluate_edge_op() - .compute(key_or_src, - key_or_dst, - weights ? (*weights)[local_nbr_idx] : weight_t{1.0}, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - e_op); + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + *(output_value_first + i) = evaluate_edge_op() + .compute(key_or_src, + key_or_dst, + weights ? (*weights)[local_nbr_idx] : weight_t{1.0}, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + e_op); ++num_valid_local_nbr_indices; } else if (invalid_value) { - *(output_first + i) = *invalid_value; + *(output_value_first + i) = *invalid_value; } else { assert(output_count_first); } @@ -168,6 +194,7 @@ struct copy_and_fill_sample_e_op_results_t { InputIterator input_first{}; OutputIterator output_first{}; size_t K{}; + typename thrust::iterator_traits::value_type invalid_value; __device__ void operator()(size_t i) const { @@ -210,6 +237,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; using key_t = typename VertexFrontierBucketType::key_type; + using key_buffer_t = + decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); using edge_partition_src_input_device_view_t = std::conditional_t< std::is_same_v, @@ -232,7 +261,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, vertex_t, typename EdgeDstValueInputWrapper::value_iterator>>>; - static_assert(!GraphViewType::is_storage_transposed == incoming); + static_assert(GraphViewType::is_storage_transposed == incoming); static_assert(std::is_same_v(size_t{0}, rmm::cuda_stream_view{}))>( + ? std::make_optional( local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) : std::nullopt; if constexpr (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); device_allgatherv(col_comm, frontier_key_first, - get_dataframe_buffer_begin(aggregate_local_frontier_keys), + get_dataframe_buffer_begin(*aggregate_local_frontier_keys), local_frontier_sizes, local_frontier_displacements, handle.get_stream()); @@ -307,7 +335,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // 2. compute degrees auto aggregate_local_frontier_local_degrees = - GrpahViewType::is_multi_gpu + GraphViewType::is_multi_gpu ? std::make_optional>( local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()) : std::nullopt; @@ -318,7 +346,6 @@ per_v_random_select_transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); vertex_t const* edge_partition_frontier_major_first{nullptr}; - vertex_t const* edge_partition_frontier_major_last{nullptr}; auto edge_partition_frontier_key_first = (GraphViewType::is_multi_gpu ? get_dataframe_buffer_begin(*aggregate_local_frontier_keys) @@ -329,12 +356,10 @@ per_v_random_select_transform_e(raft::handle_t const& handle, } else { edge_partition_frontier_major_first = thrust::get<0>(edge_partition_frontier_key_first); } - edge_partition_frontier_major_last = - edge_partition_frontier_major_first + local_frontier_sizes[i]; auto edge_partition_frontier_local_degrees = edge_partition.compute_local_degrees( - raft::device_span(edge_partition_frontier_major_first, - edge_partition_frontier_size), + raft::device_span(edge_partition_frontier_major_first, + local_frontier_sizes[i]), handle.get_stream()); if constexpr (GraphViewType::is_multi_gpu) { @@ -351,21 +376,24 @@ per_v_random_select_transform_e(raft::handle_t const& handle, } auto frontier_gathered_local_degrees = - GrpahViewType::is_multi_gpu + GraphViewType::is_multi_gpu ? std::make_optional>(frontier.size() * K, handle.get_stream()) : std::nullopt; - if constexpr (GrpahViewType::is_multi_gpu) { + if constexpr (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + std::tie(frontier_gathered_local_degrees, std::ignore) = shuffle_values(col_comm, (*aggregate_local_frontier_local_degrees).begin(), local_frontier_sizes, handle.get_stream()); - thrust::tablulate(handle.get_thrust_policy(), - frontier_degrees.begin(), - frontier_degrees.end(), - strided_sum_t{frontier_gathered_local_degrees.data(), - frontier.size(), - static_cast(col_comm_size)}); + thrust::tabulate(handle.get_thrust_policy(), + frontier_degrees.begin(), + frontier_degrees.end(), + strided_sum_t{(*frontier_gathered_local_degrees).data(), + frontier.size(), + static_cast(col_comm_size)}); aggregate_local_frontier_local_degrees = std::nullopt; } @@ -375,13 +403,13 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // FIXME: get_sampling_index is inefficient when degree >> K & with_replacement = false // FIXME: Need to verify that sample_nbr_indices[] are filled with INVALID_IDX when degree == 0 // (with_replacement = true) or degree < K (with_replacement = false) - cugraph_ops::get_sampling_index(sample_nbr_indices.data(), - rng_state, - frontier_degrees.data(), - static_cast(frontier.size()), - static_cats(K), - with_replacement, - handle.get_stream()); + cugraph::ops::gnn::graph::get_sampling_index(sample_nbr_indices.data(), + rng_state, + frontier_degrees.data(), + static_cast(frontier_degrees.size()), + static_cast(K), + with_replacement, + handle.get_stream()); frontier_degrees.resize(0, handle.get_stream()); frontier_degrees.shrink_to_fit(handle.get_stream()); @@ -392,13 +420,14 @@ per_v_random_select_transform_e(raft::handle_t const& handle, // neighbors are distributed in col_comm_size partitions) auto sample_key_indices = GraphViewType::is_multi_gpu - ? std::make_optional>(0, handle.get_stream()) + ? std::make_optional>(0, handle.get_stream()) : std::nullopt; - auto local_frontier_sample_counts = - GrpahViewType::is_multi_gpu ? std::vector(0) : std::vector{frontier.size() * K}; - auto local_frontier_sample_displacements = - GrpahViewType::is_multi_gpu ? std::vector(0) : std::vector{0}; - if constexpr (GrpahViewType::is_multi_gpu) { + auto local_frontier_sample_counts = std::vector{}; + auto local_frontier_sample_displacements = std::vector{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto col_comm_ranks = rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); auto sample_key_indices = @@ -407,41 +436,43 @@ per_v_random_select_transform_e(raft::handle_t const& handle, thrust::make_tuple(sample_local_nbr_indices.begin(), thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), divider_t{K}))); - thrust::transform(handle.get_thrsut_policy(), - input_pair_first, - input_pair_first + sample_indices.size(), - thrust::make_zip_iterator(thrust::make_tuple( - col_comm_ranks.begin(), sample_indices.begin(), key_indices.begin())), - convert_pair_to_triplet_t{ - raft::device_span(frontier_gathered_local_degrees.data(), - frontier_gathered_local_degrees.size()), - frontier.size(), - K, - col_comm_size, - ops::gnn::graph::INVALID_IDX}); + thrust::transform( + handle.get_thrust_policy(), + input_pair_first, + input_pair_first + sample_local_nbr_indices.size(), + thrust::make_zip_iterator(thrust::make_tuple( + col_comm_ranks.begin(), sample_local_nbr_indices.begin(), sample_key_indices.begin())), + convert_pair_to_triplet_t{ + raft::device_span((*frontier_gathered_local_degrees).data(), + (*frontier_gathered_local_degrees).size()), + frontier.size(), + K, + col_comm_size, + cugraph::ops::gnn::graph::INVALID_ID}); frontier_gathered_local_degrees = std::nullopt; auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - sample_local_nbr_indices.bgin(), col_comm_ranks.begin(), key_indices.begin())); + sample_local_nbr_indices.begin(), col_comm_ranks.begin(), sample_key_indices.begin())); sample_local_nbr_indices.resize( thrust::distance(triplet_first, thrust::remove_if(handle.get_thrust_policy(), triplet_first, triplet_first + sample_local_nbr_indices.size(), - invalid_col_comm_rank{int32_t{-1}})), + invalid_col_comm_rank_t{int32_t{-1}})), handle.get_stream()); col_comm_ranks.resize(sample_local_nbr_indices.size(), handle.get_stream()); sample_key_indices.resize(sample_local_nbr_indices.size(), handle.get_stream()); - auto d_tx_counts = groupby_and_count(col_comm_ranks.begin(), - col_comm_ranks.end(), - thrust::make_zip_iterator(thrust::make_tuple( - sample_local_nbr_indices.begin(), key_indices.begin())), - thrust::identity{}, - col_comm_size, - std::numeric_limits::max(), - handle.get_stream()); + auto d_tx_counts = + groupby_and_count(col_comm_ranks.begin(), + col_comm_ranks.end(), + thrust::make_zip_iterator(thrust::make_tuple( + sample_local_nbr_indices.begin(), sample_key_indices.begin())), + thrust::identity{}, + col_comm_size, + std::numeric_limits::max(), + handle.get_stream()); std::vector h_tx_counts(d_tx_counts.size()); raft::update_host( @@ -449,7 +480,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, handle.sync_stream(); auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(sample_local_nbr_indices.bgin(), key_indices.begin())); + thrust::make_tuple(sample_local_nbr_indices.begin(), sample_key_indices.begin())); auto [rx_value_buffer, rx_counts] = shuffle_values(col_comm, pair_first, h_tx_counts, handle.get_stream()); @@ -457,8 +488,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_key_indices = std::move(std::get<1>(rx_value_buffer)); local_frontier_sample_displacements = std::vector(rx_counts.size()); std::exclusive_scan( - rx_counts.begin(), rx_counts.end(), local_frontier_sample_displacements.begin()); + rx_counts.begin(), rx_counts.end(), local_frontier_sample_displacements.begin(), size_t{0}); local_frontier_sample_counts = std::move(rx_counts); + } else { + local_frontier_sample_counts.push_back(frontier.size() * K); + local_frontier_sample_displacements.push_back(size_t{0}); } // 5. transform @@ -467,7 +501,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, (!GraphViewType::is_multi_gpu && !invalid_value) ? std::make_optional>(frontier.size(), handle.get_stream()) : std::nullopt; - auto sample_e_op_results = allocate_dataframe_buffer( + auto sample_e_op_results = allocate_dataframe_buffer( local_frontier_sample_displacements.back() + local_frontier_sample_counts.back(), handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { @@ -483,7 +517,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_local_nbr_indices.begin() + local_frontier_sample_displacements[i]; auto edge_partition_sample_e_op_result_first = - get_dataframe_buffer_beign(sample_e_op_results) + local_frontier_sample_displacements[i]; + get_dataframe_buffer_begin(sample_e_op_results) + local_frontier_sample_displacements[i]; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -507,14 +541,14 @@ per_v_random_select_transform_e(raft::handle_t const& handle, thrust::count_if(handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(local_frontier_sample_counts[i]), - is_first_in_run_t{sample_key_indices.data() + + is_first_in_run_t{(*sample_key_indices).data() + local_frontier_sample_displacements[i]}); - rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_strema()); + rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_stream()); rmm::device_uvector unique_key_local_nbr_idx_counts(num_unique_key_indices, handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), - sample_key_indices.begin() + local_frontier_sample_displacements[i], - sample_key_indices.begin() + local_frontier_sample_displacements[i] + + (*sample_key_indices).begin() + local_frontier_sample_displacements[i], + (*sample_key_indices).begin() + local_frontier_sample_displacements[i] + local_frontier_sample_counts[i], thrust::make_constant_iterator(edge_t{1}), unique_key_indices.begin(), @@ -523,48 +557,20 @@ per_v_random_select_transform_e(raft::handle_t const& handle, handle.get_stream()); unique_key_local_nbr_idx_offsets.set_element_to_zero_async(size_t{0}, handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), - unique_local_nbr_idx_counts.begin(), - unique_local_nbr_idx_counts.end(), - unique_local_nbr_idx_offsets.begin() + 1); + unique_key_local_nbr_idx_counts.begin(), + unique_key_local_nbr_idx_counts.end(), + unique_key_local_nbr_idx_offsets.begin() + 1); auto offset_first = unique_key_local_nbr_idx_offsets.begin(); - thrust::for_each( - handle.get_thrust_policy(), - unique_key_indices.begin(), - unique_key_indices.end(), - transform_and_count_local_nbr_indices_t{edge_partition, - edge_partition_frontier_key_first, - offset_first, - edge_partition_sample_local_nbr_index_first, - edge_partition_sample_e_op_result_first, - thrust::nullopt, - edge_partition_src_value_input, - edge_partition_dst_value_input, - e_op, - invalid_idx, - invalid_value}); - } else { - auto offset_first = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), - multiplier_t{K}); thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(unique_key_indices.size()), + unique_key_indices.begin(), + unique_key_indices.end(), transform_and_count_local_nbr_indices_t< GraphViewType, decltype(edge_partition_frontier_key_first), decltype(offset_first), decltype(edge_partition_sample_local_nbr_index_first), decltype(edge_partition_sample_e_op_result_first), - size_t const*, + size_t*, edge_partition_src_input_device_view_t, edge_partition_dst_input_device_view_t, EdgeOp, @@ -573,13 +579,40 @@ per_v_random_select_transform_e(raft::handle_t const& handle, offset_first, edge_partition_sample_local_nbr_index_first, edge_partition_sample_e_op_result_first, - sample_counts ? thrust::optional((*sample_counts).data()) - : thrust::nullopt, + thrust::nullopt, edge_partition_src_value_input, edge_partition_dst_value_input, e_op, - invalid_idx, - invalid_value}); + cugraph::ops::gnn::graph::INVALID_ID, + invalid_value ? thrust::optional{*invalid_value} : thrust::nullopt}); + } else { + auto offset_first = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), + multiplier_t{K}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(frontier.size()), + transform_and_count_local_nbr_indices_t< + GraphViewType, + decltype(edge_partition_frontier_key_first), + decltype(offset_first), + decltype(edge_partition_sample_local_nbr_index_first), + decltype(edge_partition_sample_e_op_result_first), + size_t*, + edge_partition_src_input_device_view_t, + edge_partition_dst_input_device_view_t, + EdgeOp, + T>{edge_partition, + edge_partition_frontier_key_first, + offset_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + sample_counts ? thrust::optional((*sample_counts).data()) : thrust::nullopt, + edge_partition_src_value_input, + edge_partition_dst_value_input, + e_op, + cugraph::ops::gnn::graph::INVALID_ID, + invalid_value ? thrust::optional{*invalid_value} : thrust::nullopt}); } } @@ -589,41 +622,49 @@ per_v_random_select_transform_e(raft::handle_t const& handle, : std::make_optional>( frontier.size() + 1, handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(sample_e_op_results.bgin(), sample_key_indices.begin())); - auto [rx_value_buffer, rx_counts] = - shuffle_values(col_comm, pair_first, local_frontier_sample_counts, handle.get_stream()); - sample_e_op_results = std::move(std::get<0>(rx_value_buffer)); - sample_key_indices = std::move(std::get<1>(rx_value_buffer)); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + std::vector rx_counts{}; + std::tie(sample_e_op_results, rx_counts) = + shuffle_values(col_comm, + get_dataframe_buffer_begin(sample_e_op_results), + local_frontier_sample_counts, + handle.get_stream()); + std::tie(sample_key_indices, std::ignore) = shuffle_values( + col_comm, (*sample_key_indices).begin(), local_frontier_sample_counts, handle.get_stream()); // FIXME: better refactor this sort-and-reduce-by-key thrust::sort_by_key(handle.get_thrust_policy(), - sample_key_indices.begin(), - sample_key_indices.end(), - sample_e_op_results.begin()); + (*sample_key_indices).begin(), + (*sample_key_indices).end(), + get_dataframe_buffer_begin(sample_e_op_results)); auto num_unique_key_indices = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(sample_key_indices.size()), - is_first_in_run_t{sample_key_indices.data()}, - rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_strema()); + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator((*sample_key_indices).size()), + is_first_in_run_t{(*sample_key_indices).data()}); + rmm::device_uvector unique_key_indices(num_unique_key_indices, handle.get_stream()); rmm::device_uvector unique_key_sample_counts(num_unique_key_indices, - handle.get_stream()); + handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), - sample_key_indices.begin(), - sample_key_indices.end(), + (*sample_key_indices).begin(), + (*sample_key_indices).end(), thrust::make_constant_iterator(edge_t{1}), unique_key_indices.begin(), unique_key_sample_counts.begin()); sample_counts = rmm::device_uvector(frontier.size(), handle.get_stream()); thrust::fill( handle.get_thrust_policy(), (*sample_counts).begin(), (*sample_counts).end(), size_t{0}); - thrust::scatter(handle.get_thrust_policy(), unique_key_sample_counts.begin(), unique_key_sample_counts.end(), unique_key_indices.begin(), (*sample_counts).begin()); + thrust::scatter(handle.get_thrust_policy(), + unique_key_sample_counts.begin(), + unique_key_sample_counts.end(), + unique_key_indices.begin(), + (*sample_counts).begin()); if (invalid_value) { rmm::device_uvector sample_displacements((*sample_counts).size(), handle.get_stream()); thrust::exclusive_scan(handle.get_thrust_policy(), - (*sample_counts).beign(), - (*sample_counts).emd(), + (*sample_counts).begin(), + (*sample_counts).end(), sample_displacements.begin()); auto tmp_sample_e_op_results = allocate_dataframe_buffer(frontier.size() * K, handle.get_stream()); @@ -638,10 +679,10 @@ per_v_random_select_transform_e(raft::handle_t const& handle, raft::device_span(sample_displacements.data(), sample_displacements.size()), input_first, output_first, - K}); + K, + *invalid_value}); sample_e_op_results = std::move(tmp_sample_e_op_results); - } - else { + } else { (*sample_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), (*sample_counts).begin(), @@ -738,7 +779,7 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, std::optional invalid_value, bool do_expensive_check = false) { - static_assert(false, "unimplemented."); + CUGRAPH_FAIL("unimplemented."); return std::make_tuple(std::nullopt, allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); @@ -815,7 +856,7 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, std::optional invalid_value, bool do_expensive_check = false) { - return detail::per_v_random_select_transform_e(hanlde, + return detail::per_v_random_select_transform_e(handle, graph_view, frontier, edge_src_value_input, From da83d3ba42db17b1c3213e8e88adc09e5311f759 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 22 Sep 2022 20:17:38 -0700 Subject: [PATCH 10/18] silence spurious may-be-used-uninitialized warnings --- cpp/include/cugraph/edge_partition_device_view.cuh | 10 +++------- cpp/include/cugraph/utilities/misc_utils.cuh | 10 ++++++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index e0538a78241..df6a7ab5ad3 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -165,13 +166,8 @@ class edge_partition_device_view_t view) : detail::edge_partition_device_view_base_t( view.offsets(), view.indices(), view.weights()), - dcs_nzd_vertices_( - view.dcs_nzd_vertices() - ? thrust::optional>{*(view.dcs_nzd_vertices())} - : thrust::nullopt), - major_hypersparse_first_(view.major_hypersparse_first() - ? thrust::optional{*(view.major_hypersparse_first())} - : thrust::nullopt), + dcs_nzd_vertices_(detail::to_thrust_optional(view.dcs_nzd_vertices())), + major_hypersparse_first_(detail::to_thrust_optional(view.major_hypersparse_first())), major_range_first_(view.major_range_first()), major_range_last_(view.major_range_last()), minor_range_first_(view.minor_range_first()), diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 4d96360e2c5..2d34327d71a 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -23,7 +23,9 @@ #include #include #include +#include +#include #include #include @@ -72,6 +74,14 @@ std::tuple, std::vector> compute_offset_aligned_ed return std::make_tuple(h_vertex_offsets, h_edge_offsets); } +template +thrust::optional to_thrust_optional(std::optional val) +{ + thrust::optional ret{thrust::nullopt}; + if (val) { ret = *val; } + return ret; +} + } // namespace detail } // namespace cugraph From d5e415b98a9b0b4ec91571d440c4b3002ce15f97 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 22 Sep 2022 20:22:37 -0700 Subject: [PATCH 11/18] bug fixes --- ...r_v_random_select_transform_outgoing_e.cuh | 115 ++++++----- ...er_v_random_select_transform_outgoing_e.cu | 188 ++++++++++++++---- 2 files changed, 215 insertions(+), 88 deletions(-) diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 32c863191ee..f82eb10d8c7 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -85,6 +86,7 @@ struct invalid_col_comm_rank_t { }; template edge_partition{}; + UniqueKeyIdxIterator unique_key_idx_first{}; KeyIterator key_first{}; OffsetIterator offset_first{}; LocalNbrIdxIterator local_nbr_idx_first{}; @@ -113,9 +116,10 @@ struct transform_and_count_local_nbr_indices_t { edge_t invalid_idx{}; thrust::optional invalid_value{thrust::nullopt}; - __device__ void operator()(size_t key_idx) const + __device__ void operator()(size_t i) const { - auto key = *(key_first + key_idx); + auto key_idx = *(unique_key_idx_first + i); + auto key = *(key_first + key_idx); vertex_t major{}; if constexpr (std::is_same_v) { major = key; @@ -123,6 +127,7 @@ struct transform_and_count_local_nbr_indices_t { major = thrust::get<0>(key); } auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + printf("major=%d major_offste=%d\n", (int)major, (int)major_offset); vertex_t const* indices{nullptr}; thrust::optional weights{thrust::nullopt}; [[maybe_unused]] edge_t local_degree{0}; @@ -141,8 +146,8 @@ struct transform_and_count_local_nbr_indices_t { } else { thrust::tie(indices, weights, local_degree) = edge_partition.local_edges(major_offset); } - auto start_offset = *(offset_first + key_idx); - auto end_offset = *(offset_first + (key_idx + 1)); + auto start_offset = *(offset_first + i); + auto end_offset = *(offset_first + (i + 1)); size_t num_valid_local_nbr_indices{0}; for (size_t i = start_offset; i < end_offset; ++i) { @@ -269,6 +274,9 @@ per_v_random_select_transform_e(raft::handle_t const& handle, EdgeOp>::result_type, T>); + CUGRAPH_EXPECTS(K >= size_t{1}, + "Invalid input argument: invalid K, K should be a positive integer."); + if (do_expensive_check) { // FIXME: better re-factor this check function? vertex_t const* frontier_vertex_first{nullptr}; @@ -377,7 +385,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, auto frontier_gathered_local_degrees = GraphViewType::is_multi_gpu - ? std::make_optional>(frontier.size() * K, handle.get_stream()) + ? std::make_optional>(size_t{0}, handle.get_stream()) : std::nullopt; if constexpr (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); @@ -401,15 +409,15 @@ per_v_random_select_transform_e(raft::handle_t const& handle, rmm::device_uvector sample_nbr_indices(frontier.size() * K, handle.get_stream()); // FIXME: get_sampling_index is inefficient when degree >> K & with_replacement = false - // FIXME: Need to verify that sample_nbr_indices[] are filled with INVALID_IDX when degree == 0 - // (with_replacement = true) or degree < K (with_replacement = false) - cugraph::ops::gnn::graph::get_sampling_index(sample_nbr_indices.data(), - rng_state, - frontier_degrees.data(), - static_cast(frontier_degrees.size()), - static_cast(K), - with_replacement, - handle.get_stream()); + if (frontier_degrees.size() > 0) { + cugraph::ops::gnn::graph::get_sampling_index(sample_nbr_indices.data(), + rng_state, + frontier_degrees.data(), + static_cast(frontier_degrees.size()), + static_cast(K), + with_replacement, + handle.get_stream()); + } frontier_degrees.resize(0, handle.get_stream()); frontier_degrees.shrink_to_fit(handle.get_stream()); @@ -418,20 +426,18 @@ per_v_random_select_transform_e(raft::handle_t const& handle, auto sample_local_nbr_indices = std::move( sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's // neighbors are distributed in col_comm_size partitions) - auto sample_key_indices = - GraphViewType::is_multi_gpu - ? std::make_optional>(0, handle.get_stream()) - : std::nullopt; + std::optional> sample_key_indices{ + std::nullopt}; // relevant only when multi-GPU auto local_frontier_sample_counts = std::vector{}; auto local_frontier_sample_displacements = std::vector{}; if constexpr (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); + sample_key_indices = + rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); auto col_comm_ranks = rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); - auto sample_key_indices = - rmm::device_uvector(sample_local_nbr_indices.size(), handle.get_stream()); auto input_pair_first = thrust::make_zip_iterator( thrust::make_tuple(sample_local_nbr_indices.begin(), thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), @@ -441,7 +447,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, input_pair_first, input_pair_first + sample_local_nbr_indices.size(), thrust::make_zip_iterator(thrust::make_tuple( - col_comm_ranks.begin(), sample_local_nbr_indices.begin(), sample_key_indices.begin())), + col_comm_ranks.begin(), sample_local_nbr_indices.begin(), (*sample_key_indices).begin())), convert_pair_to_triplet_t{ raft::device_span((*frontier_gathered_local_degrees).data(), (*frontier_gathered_local_degrees).size()), @@ -453,7 +459,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, frontier_gathered_local_degrees = std::nullopt; auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - sample_local_nbr_indices.begin(), col_comm_ranks.begin(), sample_key_indices.begin())); + sample_local_nbr_indices.begin(), col_comm_ranks.begin(), (*sample_key_indices).begin())); sample_local_nbr_indices.resize( thrust::distance(triplet_first, thrust::remove_if(handle.get_thrust_policy(), @@ -462,13 +468,13 @@ per_v_random_select_transform_e(raft::handle_t const& handle, invalid_col_comm_rank_t{int32_t{-1}})), handle.get_stream()); col_comm_ranks.resize(sample_local_nbr_indices.size(), handle.get_stream()); - sample_key_indices.resize(sample_local_nbr_indices.size(), handle.get_stream()); + (*sample_key_indices).resize(sample_local_nbr_indices.size(), handle.get_stream()); auto d_tx_counts = groupby_and_count(col_comm_ranks.begin(), col_comm_ranks.end(), thrust::make_zip_iterator(thrust::make_tuple( - sample_local_nbr_indices.begin(), sample_key_indices.begin())), + sample_local_nbr_indices.begin(), (*sample_key_indices).begin())), thrust::identity{}, col_comm_size, std::numeric_limits::max(), @@ -480,7 +486,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, handle.sync_stream(); auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(sample_local_nbr_indices.begin(), sample_key_indices.begin())); + thrust::make_tuple(sample_local_nbr_indices.begin(), (*sample_key_indices).begin())); auto [rx_value_buffer, rx_counts] = shuffle_values(col_comm, pair_first, h_tx_counts, handle.get_stream()); @@ -561,30 +567,33 @@ per_v_random_select_transform_e(raft::handle_t const& handle, unique_key_local_nbr_idx_counts.end(), unique_key_local_nbr_idx_offsets.begin() + 1); auto offset_first = unique_key_local_nbr_idx_offsets.begin(); - thrust::for_each(handle.get_thrust_policy(), - unique_key_indices.begin(), - unique_key_indices.end(), - transform_and_count_local_nbr_indices_t< - GraphViewType, - decltype(edge_partition_frontier_key_first), - decltype(offset_first), - decltype(edge_partition_sample_local_nbr_index_first), - decltype(edge_partition_sample_e_op_result_first), - size_t*, - edge_partition_src_input_device_view_t, - edge_partition_dst_input_device_view_t, - EdgeOp, - T>{edge_partition, - edge_partition_frontier_key_first, - offset_first, - edge_partition_sample_local_nbr_index_first, - edge_partition_sample_e_op_result_first, - thrust::nullopt, - edge_partition_src_value_input, - edge_partition_dst_value_input, - e_op, - cugraph::ops::gnn::graph::INVALID_ID, - invalid_value ? thrust::optional{*invalid_value} : thrust::nullopt}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(unique_key_indices.size()), + transform_and_count_local_nbr_indices_t{edge_partition, + unique_key_indices.begin(), + edge_partition_frontier_key_first, + offset_first, + edge_partition_sample_local_nbr_index_first, + edge_partition_sample_e_op_result_first, + thrust::nullopt, + edge_partition_src_value_input, + edge_partition_dst_value_input, + e_op, + cugraph::ops::gnn::graph::INVALID_ID, + to_thrust_optional(invalid_value)}); } else { auto offset_first = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), multiplier_t{K}); @@ -594,6 +603,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, thrust::make_counting_iterator(frontier.size()), transform_and_count_local_nbr_indices_t< GraphViewType, + decltype(thrust::make_counting_iterator(size_t{0})), decltype(edge_partition_frontier_key_first), decltype(offset_first), decltype(edge_partition_sample_local_nbr_index_first), @@ -603,6 +613,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, edge_partition_dst_input_device_view_t, EdgeOp, T>{edge_partition, + thrust::make_counting_iterator(size_t{0}), edge_partition_frontier_key_first, offset_first, edge_partition_sample_local_nbr_index_first, @@ -612,7 +623,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, edge_partition_dst_value_input, e_op, cugraph::ops::gnn::graph::INVALID_ID, - invalid_value ? thrust::optional{*invalid_value} : thrust::nullopt}); + to_thrust_optional(invalid_value)}); } } @@ -672,8 +683,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, auto output_first = get_dataframe_buffer_begin(tmp_sample_e_op_results); thrust::for_each( handle.get_thrust_policy(), - unique_key_indices.begin(), - unique_key_indices.end(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(frontier.size()), copy_and_fill_sample_e_op_results_t{ raft::device_span((*sample_counts).data(), (*sample_counts).size()), raft::device_span(sample_displacements.data(), sample_displacements.size()), diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index a1b6df9ad7d..7fde20e7cac 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -72,6 +73,7 @@ struct e_op_t { struct Prims_Usecase { size_t K{0}; bool with_replacement{false}; + bool use_invalid_value{false}; bool test_weighted{false}; bool check_correctness{true}; }; @@ -154,6 +156,11 @@ class Tests_MGPerVRandomSelectTransformOutgoingE cugraph::to_thrust_tuple(property_t{}))); std::optional invalid_value{std::nullopt}; + if (prims_usecase.use_invalid_value) { + invalid_value = result_t{}; + thrust::get<0>(*invalid_value) = cugraph::invalid_vertex_id::value; + thrust::get<1>(*invalid_value) = cugraph::invalid_vertex_id::value; + } if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -185,41 +192,141 @@ class Tests_MGPerVRandomSelectTransformOutgoingE // 3. validate MG results if (prims_usecase.check_correctness) { -#if 0 - cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore) = + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_allgatherv( + *handle_, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); + auto out_degrees = mg_graph_view.compute_out_degrees(*handle_); + + cugraph::graph_t unrenumbered_graph(*handle_); + std::tie(unrenumbered_graph, std::ignore) = cugraph::test::construct_graph( *handle_, input_usecase, prims_usecase.test_weighted, false); - auto sg_graph_view = sg_graph.view(); - -// 1. check whether sources coincide with the local vertices in the frontier or not - -// 2. check sample counts - -// 3. check destinations exist in the input graph - -// 4. check source/destination property values - - auto sg_vertex_property_data = generate::vertex_property( - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), - thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), - hash_bin_count, - *handle_); - auto sg_dst_prop = - generate::dst_property(*handle_, sg_graph_view, sg_vertex_property_data); - auto sg_src_prop = - generate::src_property(*handle_, sg_graph_view, sg_vertex_property_data); - - auto expected_result = count_if_e( - *handle_, - sg_graph_view, - sg_src_prop.view(), - sg_dst_prop.view(), - [] __device__(auto src, auto dst, weight_t, auto src_property, auto dst_property) { - return src_property < dst_property; - }); - ASSERT_TRUE(expected_result == result); -#endif + auto unrenumbered_graph_view = unrenumbered_graph.view(); + + rmm::device_uvector unrenumbered_offsets( + unrenumbered_graph_view.number_of_vertices() + vertex_t{1}, handle_->get_stream()); + thrust::copy(handle_->get_thrust_policy(), + unrenumbered_graph_view.local_edge_partition_view().offsets().begin(), + unrenumbered_graph_view.local_edge_partition_view().offsets().end(), + unrenumbered_offsets.begin()); + rmm::device_uvector unrenumbered_indices(unrenumbered_graph_view.number_of_edges(), + handle_->get_stream()); + thrust::copy(handle_->get_thrust_policy(), + unrenumbered_graph_view.local_edge_partition_view().indices().begin(), + unrenumbered_graph_view.local_edge_partition_view().indices().end(), + unrenumbered_indices.begin()); + + auto num_invalids = static_cast(thrust::count_if( + handle_->get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(mg_vertex_frontier.bucket(bucket_idx_cur).size()), + [frontier_vertex_first = mg_vertex_frontier.bucket(bucket_idx_cur).begin(), + v_first = mg_graph_view.local_vertex_partition_range_first(), + sample_offsets = sample_offsets + ? thrust::make_optional((*sample_offsets).data()) + : thrust::nullopt, + sample_e_op_results = cugraph::get_dataframe_buffer_begin(sample_e_op_results), + out_degrees = out_degrees.begin(), + aggregate_renumber_map_labels = d_mg_aggregate_renumber_map_labels.begin(), + unrenumbered_offsets = unrenumbered_offsets.begin(), + unrenumbered_indices = unrenumbered_indices.begin(), + K = prims_usecase.K, + with_replacement = prims_usecase.with_replacement, + invalid_value = + invalid_value ? thrust::make_optional(*invalid_value) : thrust::nullopt, + property_transform = cugraph::test::detail::property_transform{ + hash_bin_count}] __device__(size_t i) { + auto v = *(frontier_vertex_first + i); + + // check sample_offsets + + auto offset_first = sample_offsets ? *(*sample_offsets + i) : K * i; + auto offset_last = sample_offsets ? *(*sample_offsets + (i + 1)) : K * (i + 1); + if (!sample_offsets) { + size_t num_valids{0}; + for (size_t j = offset_first; j < offset_last; ++j) { + auto e_op_result = *(sample_e_op_results + j); + if (e_op_result == *invalid_value) { break; } + ++num_valids; + } + for (size_t j = offset_first + num_valids; j < offset_last; ++j) { + auto e_op_result = *(sample_e_op_results + j); + if (e_op_result != *invalid_value) { return true; } + } + offset_last = offset_first + num_valids; + } + auto count = offset_last - offset_first; + + auto v_offset = v - v_first; + auto out_degree = *(out_degrees + v_offset); + if (with_replacement) { + if ((out_degree > 0 && count != K) || (out_degree == 0 && count != 0)) { return true; } + } else { + if (count != std::min(static_cast(out_degree), K)) { return true; } + } + + // check sample_e_op_results + + for (size_t j = offset_first; j < offset_last; ++j) { + auto e_op_result = *(sample_e_op_results + j); + auto src = thrust::get<0>(e_op_result); + auto dst = thrust::get<1>(e_op_result); + if (src != v) { return true; } + auto unrenumbered_src = *(aggregate_renumber_map_labels + src); + auto unrenumbered_dst = *(aggregate_renumber_map_labels + dst); + auto unrenumbered_dst_first = + unrenumbered_indices + *(unrenumbered_offsets + unrenumbered_src); + auto unrenumbered_dst_last = + unrenumbered_indices + *(unrenumbered_offsets + (unrenumbered_src + vertex_t{1})); + if (!thrust::binary_search(thrust::seq, + unrenumbered_dst_first, + unrenumbered_dst_last, + unrenumbered_dst)) { // assumed neighbor lists are sorted + return true; + } + property_t src_val{}; + property_t dst_val{}; + if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { + src_val = + thrust::make_tuple(thrust::get<2>(e_op_result), thrust::get<3>(e_op_result)); + dst_val = + thrust::make_tuple(thrust::get<4>(e_op_result), thrust::get<5>(e_op_result)); + } else { + src_val = thrust::get<2>(e_op_result); + dst_val = thrust::get<3>(e_op_result); + } + if (src_val != property_transform(unrenumbered_src)) { return true; } + if (dst_val != property_transform(unrenumbered_dst)) { return true; } + + if (!with_replacement) { + auto dst_first = + thrust::get<1>(sample_e_op_results.get_iterator_tuple()) + offset_first; + auto dst_last = + thrust::get<1>(sample_e_op_results.get_iterator_tuple()) + offset_last; + auto dst_count = + thrust::count(thrust::seq, + dst_first, + dst_last, + dst); // this could be inefficient for high-degree vertices, if we + // sort [dst_first, dst_last) we can use binary search but we + // may better not modify the sampling output and allow + // inefficiency as this is just for testing + auto multiplicity = thrust::distance( + thrust::lower_bound( + thrust::seq, unrenumbered_dst_first, unrenumbered_dst_last, unrenumbered_dst), + thrust::upper_bound(thrust::seq, + unrenumbered_dst_first, + unrenumbered_dst_last, + unrenumbered_dst)); // this assumes neighbor lists are sorted + if (dst_count > multiplicity) { return true; } + } + } + + return false; + })); + + num_invalids = cugraph::host_scalar_allreduce( + handle_->get_comms(), num_invalids, raft::comms::op_t::SUM, handle_->get_stream()); + ASSERT_TRUE(num_invalids == 0); } } @@ -269,7 +376,10 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVRandomSelectTransformOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{true}), + ::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true}, + Prims_Usecase{size_t{4}, false, true, false, true}, + Prims_Usecase{size_t{4}, true, false, false, true}, + Prims_Usecase{size_t{4}, true, true, false, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -278,14 +388,20 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( rmat_small_test, Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, true}, + Prims_Usecase{size_t{4}, false, true, false, true}, + Prims_Usecase{size_t{4}, true, false, false, true}, + Prims_Usecase{size_t{4}, true, true, false, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P( rmat_large_test, Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{false}), + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{4}, false, false, false, false}, + Prims_Usecase{size_t{4}, false, true, false, false}, + Prims_Usecase{size_t{4}, true, false, false, false}, + Prims_Usecase{size_t{4}, true, true, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase( 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); From 2a7d0c845462da4f59b2d95055aa296635b21d91 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 22 Sep 2022 20:27:19 -0700 Subject: [PATCH 12/18] fix clang-format errors --- cpp/include/cugraph/utilities/device_functors.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 8a75f2a3379..bf43b411757 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -81,7 +81,8 @@ struct strided_sum_t { size_t stride{0}; size_t count{0}; - __device__ T operator()(size_t start_offset) const { + __device__ T operator()(size_t start_offset) const + { T sum{0}; for (size_t j = 0; j < count; ++j) { sum += values[start_offset + stride * j]; From 1fb69174ecb6e29df717c54e9f455efa0299326b Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 23 Sep 2022 09:12:10 -0700 Subject: [PATCH 13/18] guard a cugraph-ops call inside ifdef --- cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index f82eb10d8c7..b874cbe433f 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -410,6 +410,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, rmm::device_uvector sample_nbr_indices(frontier.size() * K, handle.get_stream()); // FIXME: get_sampling_index is inefficient when degree >> K & with_replacement = false if (frontier_degrees.size() > 0) { +#ifndef NO_CUGRAPH_OPS cugraph::ops::gnn::graph::get_sampling_index(sample_nbr_indices.data(), rng_state, frontier_degrees.data(), @@ -417,6 +418,9 @@ per_v_random_select_transform_e(raft::handle_t const& handle, static_cast(K), with_replacement, handle.get_stream()); +#else + CUGRAPH_FAIL("unimplemented."); +#endif } frontier_degrees.resize(0, handle.get_stream()); frontier_degrees.shrink_to_fit(handle.get_stream()); From 1d73867302591f51f94eed09b79e88c3ea389ac0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 23 Sep 2022 18:02:47 -0700 Subject: [PATCH 14/18] first draft implementation --- cpp/src/prims/detail/nbr_intersection.cuh | 145 +++---- ..._v_pair_transform_dst_nbr_intersection.cuh | 357 ++++++++++++++++++ ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 20 +- 3 files changed, 439 insertions(+), 83 deletions(-) create mode 100644 cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 91bb5bfec13..cb1ef32b660 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -473,6 +473,77 @@ struct gatherv_indices_t { } }; +template +size_t count_invalid_vertex_pairs(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexPairIterator vertex_pair_first, + VertexPairIterator vertex_pair_last) +{ + using vertex_t = typename GraphViewType::vertex_type; + + std::vector h_edge_partition_major_range_firsts( + graph_view.number_of_local_edge_partitions()); + std::vector h_edge_partition_major_range_lasts( + h_edge_partition_major_range_firsts.size()); + vertex_t edge_partition_minor_range_first{}; + vertex_t edge_partition_minor_range_last{}; + if constexpr (GraphViewType::is_multi_gpu) { + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i++) { + if constexpr (GraphViewType::is_storage_transposed) { + h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_dst_range_first(i); + h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_dst_range_last(i); + } else { + h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_src_range_first(i); + h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_last(i); + } + } + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_minor_range_first = graph_view.local_edge_partition_src_range_first(); + edge_partition_minor_range_last = graph_view.local_edge_partition_src_range_last(); + } else { + edge_partition_minor_range_first = graph_view.local_edge_partition_dst_range_first(); + edge_partition_minor_range_last = graph_view.local_edge_partition_dst_range_last(); + } + } else { + h_edge_partition_major_range_firsts[0] = vertex_t{0}; + h_edge_partition_major_range_lasts[0] = graph_view.number_of_vertices(); + edge_partition_minor_range_first = vertex_t{0}; + edge_partition_minor_range_last = graph_view.number_of_vertices(); + } + rmm::device_uvector d_edge_partition_major_range_firsts( + h_edge_partition_major_range_firsts.size(), handle.get_stream()); + rmm::device_uvector d_edge_partition_major_range_lasts( + h_edge_partition_major_range_lasts.size(), handle.get_stream()); + raft::update_device(d_edge_partition_major_range_firsts.data(), + h_edge_partition_major_range_firsts.data(), + h_edge_partition_major_range_firsts.size(), + handle.get_stream()); + raft::update_device(d_edge_partition_major_range_lasts.data(), + h_edge_partition_major_range_lasts.data(), + h_edge_partition_major_range_lasts.size(), + handle.get_stream()); + + auto num_invalid_pairs = thrust::count_if( + handle.get_thrust_policy(), + vertex_pair_first, + vertex_pair_last, + is_invalid_input_vertex_pair_t{ + graph_view.number_of_vertices(), + raft::device_span(d_edge_partition_major_range_firsts.begin(), + d_edge_partition_major_range_firsts.end()), + raft::device_span(d_edge_partition_major_range_lasts.begin(), + d_edge_partition_major_range_lasts.end()), + edge_partition_minor_range_first, + edge_partition_minor_range_last}); + if constexpr (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + num_invalid_pairs = + host_scalar_allreduce(comm, num_invalid_pairs, raft::comms::op_t::SUM, handle.get_stream()); + } + + return num_invalid_pairs; +} + // In multi-GPU, the first element of every vertex pair in [vertex_pair_first, vertex_pair) should // be within the valid edge partition major range assigned to this process and the second element // should be within the valid edge partition minor range assigned to this process. @@ -483,7 +554,7 @@ struct gatherv_indices_t { // one can limit the number of unique vertices (aggregated over column communicator in multi-GPU) to // build neighbor list; we need to bulid neighbor lists for the first element of every input vertex // pair if intersect_dst_nbr[0] == GraphViewType::is_storage_transposed and build neighbor lists for -// the second element of every input vertex pair if single-GPU and intersect_dst_nbr[0] == +// the second element of every input vertex pair if single-GPU and intersect_dst_nbr[1] == // GraphViewType::is_storage_transposed or multi-GPU. For load balancing, // thrust::distance(vertex_pair_first, vertex_pair_last) should be comparable across the global // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID % @@ -517,79 +588,22 @@ nbr_intersection(raft::handle_t const& handle, if (do_expensive_check) { auto is_sorted = thrust::is_sorted(handle.get_thrust_policy(), vertex_pair_first, vertex_pair_last); - - std::vector h_edge_partition_major_range_firsts( - graph_view.number_of_local_edge_partitions()); - std::vector h_edge_partition_major_range_lasts( - h_edge_partition_major_range_firsts.size()); - vertex_t edge_partition_minor_range_first{}; - vertex_t edge_partition_minor_range_last{}; - if constexpr (GraphViewType::is_multi_gpu) { - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i++) { - if constexpr (GraphViewType::is_storage_transposed) { - h_edge_partition_major_range_firsts[i] = - graph_view.local_edge_partition_dst_range_first(i); - h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_dst_range_last(i); - } else { - h_edge_partition_major_range_firsts[i] = - graph_view.local_edge_partition_src_range_first(i); - h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_last(i); - } - } - if constexpr (GraphViewType::is_storage_transposed) { - edge_partition_minor_range_first = graph_view.local_edge_partition_src_range_first(); - edge_partition_minor_range_last = graph_view.local_edge_partition_src_range_last(); - } else { - edge_partition_minor_range_first = graph_view.local_edge_partition_dst_range_first(); - edge_partition_minor_range_last = graph_view.local_edge_partition_dst_range_last(); - } - } else { - h_edge_partition_major_range_firsts[0] = vertex_t{0}; - h_edge_partition_major_range_lasts[0] = graph_view.number_of_vertices(); - edge_partition_minor_range_first = vertex_t{0}; - edge_partition_minor_range_last = graph_view.number_of_vertices(); - } - rmm::device_uvector d_edge_partition_major_range_firsts( - h_edge_partition_major_range_firsts.size(), handle.get_stream()); - rmm::device_uvector d_edge_partition_major_range_lasts( - h_edge_partition_major_range_lasts.size(), handle.get_stream()); - raft::update_device(d_edge_partition_major_range_firsts.data(), - h_edge_partition_major_range_firsts.data(), - h_edge_partition_major_range_firsts.size(), - handle.get_stream()); - raft::update_device(d_edge_partition_major_range_lasts.data(), - h_edge_partition_major_range_lasts.data(), - h_edge_partition_major_range_lasts.size(), - handle.get_stream()); - - auto num_invalid_pairs = thrust::count_if( - handle.get_thrust_policy(), - vertex_pair_first, - vertex_pair_last, - is_invalid_input_vertex_pair_t{ - graph_view.number_of_vertices(), - raft::device_span(d_edge_partition_major_range_firsts.begin(), - d_edge_partition_major_range_firsts.end()), - raft::device_span(d_edge_partition_major_range_lasts.begin(), - d_edge_partition_major_range_lasts.end()), - edge_partition_minor_range_first, - edge_partition_minor_range_last}); if constexpr (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); - - is_sorted = static_cast(host_scalar_allreduce( + is_sorted = static_cast(host_scalar_allreduce( comm, static_cast(is_sorted), raft::comms::op_t::MIN, handle.get_stream())); - num_invalid_pairs = - host_scalar_allreduce(comm, num_invalid_pairs, raft::comms::op_t::SUM, handle.get_stream()); } - CUGRAPH_EXPECTS(is_sorted, "Invalid input arguments: input vertex pairs should be sorted."); + + auto num_invalid_pairs = + count_invalid_vertex_pairs(handle, graph_view, vertex_pair_first, vertex_pair_last); CUGRAPH_EXPECTS(num_invalid_pairs == 0, "Invalid input arguments: there are invalid input vertex pairs."); } // 2. Collect neighbor lists for unique second pair elements (for the neighbors within the minor - // range for this GPU) + // range for this GPU); Note that no need to collect for first pair elements as they already + // locally reside. auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); [[maybe_unused]] auto stream_adapter = @@ -634,6 +648,9 @@ nbr_intersection(raft::handle_t const& handle, unique_majors.shrink_to_fit(handle.get_stream()); if (col_comm_size > 1) { + // FIXME: We may refactor this code to improve scalability. We may call multiple gatherv + // calls, perform local sort and unique, and call multiple broadcasts rather than + // performing sort and unique for the entire range in every GPU in col_comm. auto rx_counts = host_scalar_allgather(col_comm, unique_majors.size(), handle.get_stream()); std::vector rx_displacements(rx_counts.size()); diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh new file mode 100644 index 00000000000..a0f2d6f5338 --- /dev/null +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cugraph { + +namespace detail { + +template +struct compute_local_edge_partition_id_t { + VertexPairIterator vertex_pair_first{}; + size_t num_local_edge_partitions{}; + raft::device_span edge_partition_major_range_lasts{}; + + __device__ int operator()(size_t i) const + { + auto major = thurst::get<0>(*(vertex_pair_first + i)); + return static_cast( + thrust::distance(edge_partition_major_range_lasts.begin(), + thrust::upper_bound(thrust::seq, + edge_partition_major_range_lasts.begin(), + edge_partition_major_range_lasts.end(), + major))); + } +}; + +template +struct compute_chunk_id_t { + VertexPairIterator vertex_pair_first{}; + size_t num_chunks{}; + + __device__ int operator()(size_t i) const + { + return static_cast(thrust::get<1>(*(vertex_pair_first + i)) % num_chunks); + } +}; + +template +struct call_intersection_op_t { + edge_partition_device_view_t + edge_partition{}; + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input{}; + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input{}; + IntersectionOp intersection_op{}; + size_t const* nbr_offsets{nullptr}; + typename GraphViewType::vertex_type const* nbr_indices{nullptr}; + VertexPairIndexIterator major_minor_pair_index_first{}; + VertexPairIterator major_minor_pair_first{}; + VertexPairValueOutputIterator major_minor_pair_value_output_first{}; + + __device__ void operator()(size_t i) const + { + auto index = *(major_minor_pair_index_first + i); + auto pair = *(major_minor_pair_first + index); + auto major = thrust::get<0>(pair); + auto minor = thrust::get<1>(pair); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + auto src = GraphViewType::is_storage_transposed ? minor : major; + auto dst = GraphViewType::is_storage_transposed ? major : minor; + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + auto intersection = raft::device_span( + nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); + *(major_minor_pair_value_output_first + index) = + evaluate_intersection_op() + .compute(src, + dst, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + intersection, + intersection_op); + } +}; + +} // namespace detail + +/** + * @brief Iterate over each input vertex pair and apply a functor to the common destination neighbor + * list of the pair. + * + * Iterate over every vertex pair; intersect destination neighbor lists of the two vertices in the + * pair; invoke a user-provided functor, and store the functor output. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexPairIterator Type of the iterator for input vertex pairs. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam IntersectionOp Type of the quinary per intersection operator. + * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property + * variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_pair_first Iterator pointing to the first (inclusive) input vertex pair. + * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param intersection_op quinary operator takes first vertex of the pair, second vertex of the + * pair, property values for the first vertex, property values for the second vertex, and a list of + * vertices in the intersection of the first & second vertices' destination neighbors and returns an + * output value for the input pair. + * @param vertex_pair_value_output_first Iterator pointing to the vertex pair property variables for + * the first vertex pair (inclusive). `vertex_pair_value_output_last` (exclusive) is deduced as @p + * vertex_pair_value_output_first + @p thrust::distance(vertex_pair_first, vertex_pair_last). + * @param A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void per_v_pair_transform_dst_nbr_intersection( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexPairIterator vertex_pair_first, + VertexPairIterator vertex_pair_last, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + IntersectionOp intersection_op, + VertexPairValueOutputIterator vertex_pair_value_output_first, + bool do_expensive_check = false) +{ + static_assert(!GraphViewType::is_storage_transposed); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using result_t = typename thrust::iterator_traits::value_type; + + using edge_partition_src_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_endpoint_dummy_property_device_view_t, + std::conditional_t, + detail::edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeSrcValueInputWrapper::value_iterator>>>; + using edge_partition_dst_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_endpoint_dummy_property_device_view_t, + std::conditional_t, + detail::edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeDstValueInputWrapper::value_iterator>>>; + + if (do_expensive_check) { + auto num_invalids = + detail::count_invalid_vertex_pairs(handle, graph_view, vertex_pair_first, vertex_pair_last); + CUGRAPH_EXPECTS(num_invalids == 0, ""); + } + + rmm::device_uvector vertex_pair_indices( + thrust::distance(vertex_pair_first, vertex_pair_last), handle.get_stream()); + thrust::sequence( + handle.get_thrust_policy(), vertex_pair_indices.begin(), vertex_pair_indices.end(), size_t{0}); + + std::vector h_edge_partition_major_range_lasts( + graph_view.number_of_local_edge_partitions); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_first(i); + } + rmm::device_uvector d_edge_partition_major_range_lasts( + h_edge_partition_major_range_lasts.size(), handle.get_stream()); + raft::update_device(d_edge_partition_major_range_lasts.data(), + h_edge_partition_major_range_lasts.data(), + h_edge_partition_major_range_lasts.size(), + std::cout); + auto d_edge_partition_group_sizes = groupby_and_count( + vertex_pair_indices.begin(), + vertex_pair_indices.end(), + detail::compute_local_edge_partition_id_t{ + vertex_pair_first, + graph_view.number_of_local_edge_partitions(), + raft::device_span(d_edge_partition_major_range_lasts.data(), + d_edge_partition_major_range_lasts.size())}, + static_cast(graph_view.number_of_local_edge_partitions()), + std::numeric_limits::max(), + handle.get_stream()); + std::vector h_edge_partition_group_sizes(d_edge_partition_group_sizes.size()); + raft::update_host(h_edge_partition_group_sizes.data(), + d_edge_partition_group_sizes.data(), + d_edge_partition_group_sizes.size(), + handle.get_stream()); + handle.sync_stream(); + std::vector h_edge_partition_group_displacements(h_edge_partition_group_sizes.size()); + std::exclusive_scan(h_edge_partition_group_sizes.begin(), + h_edge_partition_group_sizes.end(), + h_edge_partition_group_displacements.begin()); + + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + + edge_partition_src_input_device_view_t edge_partition_src_value_input{}; + edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input, i); + } else { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input, i); + edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); + } + + auto edge_partition_vertex_pair_index_first = + vertex_pair_indices.begin() + h_edge_partition_group_displacements[i]; + + // FIXME: Peak memory requirement is also dependent on the average minimum degree of the input + // vertex pairs. We may need a more sophisticated mechanism to set max_chunk_size considering + // vertex degrees. to limit memory footprint ((1 << 15) is a tuning parameter) + auto max_chunk_size = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 15); + auto max_num_chunks = (majors.size() + max_chunk_size - 1) / max_chunk_size; + if constexpr (GraphViewType::is_multi_gpu) { + max_num_chunks = host_scalar_allreduce( + handle.get_comms(), max_num_chunks, raft::comms::op_t::MAX, handle.get_stream()); + } + + std::vector h_chunk_sizes(max_num_chunks); + if (h_chunk_sizes.size() > size_t{1}) { + auto d_chunk_sizes = + groupby_and_count(edge_partition_vertex_pair_index_first, + edge_partition_vertex_pair_index_first + h_edge_partition_group_sizes[i], + detail::compute_chunk_id_t{max_num_chunks}, + static_cast(max_num_chunks), + std::numeric_limits::max(), + handle.get_stream()); + raft::update_host( + h_chunk_sizes.data(), d_chunk_sizes.data(), d_chunk_sizes.size(), handle.get_stream()); + handle.sync_stream(); + } else if (h_chunk_sizes.size() == size_t{1}) { + h_chunk_sizes[0] = majors.size(); + } + + auto chunk_vertex_pair_index_first = edge_partition_vertx_pair_index_first; + for (size_t j = 0; j < h_chunk_sizes.size(); ++j) { + auto this_chunk_size = h_chunk_sizes[j]; + + thrust::sort(handle.get_thrust_policy(), + chunk_vertex_pair_index_first, + chunk_vertex_pair_index_first + this_chunk_size, + indirection_copmare_less_t{ + vertex_pair_first}); // detail::nbr_intersection() requires the input vertex + // pairs to be sorted. + + // FIXME: better restrict detail::nbr_intersection input vetex pairs to a single edge + // partition? This may provide additional performance improvement opportunities??? + auto chunk_vertex_pair_fist = thrust::make_transform_iterator( + chunk_vertex_pair_index_first, indirection_t{vertex_pair_first}); + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + graph_view, + chunk_vertex_pair_first, + chunk_vertex_pair_first + this_chunk_size, + std::array{true, true}, + do_expensive_check); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t{ + edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); + + chunk_vertex_pair_index_first += this_chunk_size; + } + } +} + +} // namespace cugraph diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index ab33dc7d7b9..5267bf58488 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -275,24 +275,6 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( vertex_value_output_first + graph_view.local_vertex_partition_range_size(), init); - std::optional> - d_vertex_partition_range_lasts_in_edge_partition_minor_range{std::nullopt}; - if constexpr (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - d_vertex_partition_range_lasts_in_edge_partition_minor_range = - rmm::device_uvector(row_comm_size, handle.get_stream()); - auto h_vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - raft::update_device((*d_vertex_partition_range_lasts_in_edge_partition_minor_range).data(), - h_vertex_partition_range_lasts.data() + row_comm_size * col_comm_rank, - row_comm_size, - handle.get_stream()); - } - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( @@ -325,7 +307,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( // FIXME: Peak memory requirement is also dependent on the average minimum degree of the input // vertex pairs. We may need a more sophisticated mechanism to set max_chunk_size considering - // vertex degrees. to limit memory footprint ((1 << 10) is a tuning parameter) + // vertex degrees. to limit memory footprint ((1 << 15) is a tuning parameter) auto max_chunk_size = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 15); auto max_num_chunks = (majors.size() + max_chunk_size - 1) / max_chunk_size; From bbe6fbacad0bd8b08b4569a670b662f5d0782288 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 26 Sep 2022 15:42:32 -0700 Subject: [PATCH 15/18] rename utility functors --- cpp/include/cugraph/graph_functions.hpp | 23 ++++++----- .../weakly_connected_components_impl.cuh | 2 +- cpp/src/detail/graph_utils.cuh | 41 +++++++++++++++---- cpp/src/detail/shuffle_wrappers.cu | 8 ++-- .../create_graph_from_edgelist_impl.cuh | 2 +- cpp/src/structure/renumber_edgelist_impl.cuh | 5 ++- cpp/src/utilities/cython.cu | 6 +-- .../utilities/matrix_market_file_utilities.cu | 2 +- 8 files changed, 57 insertions(+), 32 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index fcf5cb00572..ac14642f2c3 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -51,7 +51,8 @@ struct renumber_meta_t * * This function assumes that vertices are pre-shuffled to their target processes and edges are * pre-shuffled to their target processess and edge partitions using compute_gpu_id_from_vertex_t - * and compute_gpu_id_from_edge_t & compute_partition_id_from_edge_t functors, respectively. + * and compute_gpu_id_from_ext_edge_endpoints_t & compute_partition_id_from_ext_edge_endpoints_t + * functors, respectively. * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. @@ -65,12 +66,12 @@ struct renumber_meta_t * work (vertices should be pre-shuffled). * @param edgelist_srcs Pointers (one pointer per local edge partition assigned to this process) to * edge source vertex IDs. Source IDs are updated in-place ([INOUT] parameter). Applying the - * compute_gpu_id_from_edge_t functor to every (destination ID, source ID) pair (if store_transposed - * = true) or (source ID, destination ID) pair (if store_transposed = false) should return the local - * GPU ID for this function to work (edges should be pre-shuffled). Applying the - * compute_partition_id_from_edge_t to every (destination ID, source ID) pair (if store_transposed = - * true) or (source ID, destination ID) pair (if store_transposed = false) should also return the - * corresponding edge partition ID. The best way to enforce this is to use + * compute_gpu_id_from_ext_edge_endpoints_t functor to every (destination ID, source ID) pair (if + * store_transposed = true) or (source ID, destination ID) pair (if store_transposed = false) should + * return the local GPU ID for this function to work (edges should be pre-shuffled). Applying the + * compute_partition_id_from_ext_edge_endpoints_t to every (destination ID, source ID) pair (if + * store_transposed = true) or (source ID, destination ID) pair (if store_transposed = false) should + * also return the corresponding edge partition ID. The best way to enforce this is to use * shuffle_edgelist_by_gpu_id & groupby_and_count_edgelist_by_local_partition_id. * @param edgelist_dsts Pointers (one pointer per local edge partition assigned to this process) to * edge destination vertex IDs. Destination IDs are updated in-place ([INOUT] parameter). @@ -347,8 +348,8 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param edgelist_srcs Vector of edge source vertex IDs. If multi-GPU, applying the - * compute_gpu_id_from_edge_t to every edge should return the local GPU ID for this function to work - * (edges should be pre-shuffled). + * compute_gpu_id_from_ext_edge_endpoints_t to every edge should return the local GPU ID for this + * function to work (edges should be pre-shuffled). * @param edgelist_dsts Vector of edge destination vertex IDs. * @param edgelist_weights Vector of edge weights. * @param reciprocal Flag indicating whether to keep (if set to `false`) or discard (if set to @@ -500,8 +501,8 @@ extract_induced_subgraphs( * compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function to * work (vertices should be pre-shuffled). * @param edgelist_srcs Vector of edge source vertex IDs. If multi-GPU, applying the - * compute_gpu_id_from_edge_t to every edge should return the local GPU ID for this function to work - * (edges should be pre-shuffled). + * compute_gpu_id_from_ext_edge_endpoints_t to every edge should return the local GPU ID for this + * function to work (edges should be pre-shuffled). * @param edgelist_dsts Vector of edge destination vertex IDs. * @param edgelist_weights Vector of edge weights. * @param edgelist_id_type_pairs Vector of edge ID and type pairs. diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index 6aa14ce5e73..d2a1ef30d67 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -711,7 +711,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, get_dataframe_buffer_begin(edge_buffer), get_dataframe_buffer_end(edge_buffer), [key_func = - cugraph::detail::compute_gpu_id_from_edge_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, diff --git a/cpp/src/detail/graph_utils.cuh b/cpp/src/detail/graph_utils.cuh index 8094de556c1..f7b0490f156 100644 --- a/cpp/src/detail/graph_utils.cuh +++ b/cpp/src/detail/graph_utils.cuh @@ -54,21 +54,19 @@ struct compute_gpu_id_from_ext_vertex_t { template struct compute_gpu_id_from_int_vertex_t { - raft::device_span vertex_partition_range_lasts_span; + raft::device_span vertex_partition_range_lasts{}; __device__ int operator()(vertex_t v) const { - return static_cast( - thrust::distance(vertex_partition_range_lasts_span.begin(), - thrust::upper_bound(thrust::seq, - vertex_partition_range_lasts_span.begin(), - vertex_partition_range_lasts_span.end(), - v))); + return static_cast(thrust::distance( + vertex_partition_range_lasts.begin(), + thrust::upper_bound( + thrust::seq, vertex_partition_range_lasts.begin(), vertex_partition_range_lasts.end(), v))); } }; template -struct compute_gpu_id_from_edge_t { +struct compute_gpu_id_from_ext_edge_endpoints_t { int comm_size{0}; int row_comm_size{0}; int col_comm_size{0}; @@ -83,7 +81,32 @@ struct compute_gpu_id_from_edge_t { }; template -struct compute_partition_id_from_edge_t { +struct compute_gpu_id_from_int_edge_endpoints_t { + raft::device_span vertex_partition_range_lasts{}; + int comm_size{0}; + int row_comm_size{0}; + int col_comm_size{0}; + + __device__ int operator()(vertex_t major, vertex_t minor) const + { + auto major_comm_rank = + static_cast(thrust::distance(vertex_partition_range_lasts.begin(), + thrust::upper_bound(thrust::seq, + vertex_partition_range_lasts.begin(), + vertex_partition_range_lasts.end(), + major))); + auto minor_comm_rank = + static_cast(thrust::distance(vertex_partition_range_lasts.begin(), + thrust::upper_bound(thrust::seq, + vertex_partition_range_lasts.begin(), + vertex_partition_range_lasts.end(), + major))); + return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } +}; + +template +struct compute_partition_id_from_ext_edge_endpoints_t { int comm_size{0}; int row_comm_size{0}; int col_comm_size{0}; diff --git a/cpp/src/detail/shuffle_wrappers.cu b/cpp/src/detail/shuffle_wrappers.cu index 93e408feccd..f8271cbfa4a 100644 --- a/cpp/src/detail/shuffle_wrappers.cu +++ b/cpp/src/detail/shuffle_wrappers.cu @@ -84,7 +84,7 @@ shuffle_edgelist_by_gpu_id(raft::handle_t const& handle, edge_first, edge_first + d_edgelist_majors.size(), [key_func = - cugraph::detail::compute_gpu_id_from_edge_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, @@ -133,7 +133,7 @@ shuffle_edgelist_by_gpu_id(raft::handle_t const& handle, edge_first, edge_first + d_edgelist_majors.size(), [key_func = - cugraph::detail::compute_gpu_id_from_edge_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, @@ -364,7 +364,7 @@ rmm::device_uvector groupby_and_count_edgelist_by_local_partition_id( [comm_size, row_comm_size, partition_id_key_func = - cugraph::detail::compute_partition_id_from_edge_t{ + cugraph::detail::compute_partition_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}, gpu_id_key_func = cugraph::detail::compute_gpu_id_from_ext_vertex_t{ comm_size}] __device__(auto pair) { @@ -412,7 +412,7 @@ rmm::device_uvector groupby_and_count_edgelist_by_local_partition_id( } else { auto local_partition_id_op = [comm_size, - key_func = cugraph::detail::compute_partition_id_from_edge_t{ + key_func = cugraph::detail::compute_partition_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / comm_size; // global partition id to local partition id diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh index dfe2941eb86..6ddef877d52 100644 --- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh +++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh @@ -115,7 +115,7 @@ void expensive_check_edgelist(raft::handle_t const& handle, edge_first + edgelist_majors.size(), [comm_rank, gpu_id_key_func = - detail::compute_gpu_id_from_edge_t{ + detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto e) { return (gpu_id_key_func(thrust::get<0>(e), thrust::get<1>(e)) != comm_rank); diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index f473bfba292..e45cdef8192 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -510,9 +510,10 @@ void expensive_check_edgelist( col_comm_rank, i, gpu_id_key_func = - detail::compute_gpu_id_from_edge_t{comm_size, row_comm_size, col_comm_size}, + detail::compute_gpu_id_from_ext_edge_endpoints_t{ + comm_size, row_comm_size, col_comm_size}, partition_id_key_func = - detail::compute_partition_id_from_edge_t{ + detail::compute_partition_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto edge) { return (gpu_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank) || (partition_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 727a63c7080..10c6f2f616c 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -840,7 +840,7 @@ std::unique_ptr> call_shuffle( zip_edge, zip_edge + num_edgelist_edges, [key_func = - cugraph::detail::compute_gpu_id_from_edge_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, @@ -856,7 +856,7 @@ std::unique_ptr> call_shuffle( zip_edge, zip_edge + num_edgelist_edges, [key_func = - cugraph::detail::compute_gpu_id_from_edge_t{ + cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, @@ -865,7 +865,7 @@ std::unique_ptr> call_shuffle( auto local_partition_id_op = [comm_size, - key_func = cugraph::detail::compute_partition_id_from_edge_t{ + key_func = cugraph::detail::compute_partition_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / comm_size; // global partition id to local partition id diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index 5804b0e4b40..616f69eebe7 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -342,7 +342,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, handle.get_stream()); d_vertices.shrink_to_fit(handle.get_stream()); - auto edge_key_func = cugraph::detail::compute_gpu_id_from_edge_t{ + auto edge_key_func = cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t{ comm_size, row_comm_size, col_comm_size}; size_t number_of_local_edges{}; if (d_edgelist_weights) { From 342036455c69c4ebd6f1702547c34b5d5ce46431 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 27 Sep 2022 14:39:01 -0700 Subject: [PATCH 16/18] add a test and fix compile errors --- .../cugraph/utilities/device_functors.cuh | 10 + cpp/src/detail/graph_utils.cuh | 19 +- cpp/src/prims/detail/nbr_intersection.cuh | 1 + ..._v_pair_transform_dst_nbr_intersection.cuh | 265 +++++++++++------- cpp/tests/CMakeLists.txt | 138 ++++----- 5 files changed, 258 insertions(+), 175 deletions(-) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index bf43b411757..19e14d1d199 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -37,6 +37,16 @@ struct typecast_t { __device__ output_t operator()(input_t val) const { return static_cast(val); } }; +template +struct indirection_t { + Iterator first{}; + + __device__ typename thrust::iterator_traits::value_type operator()(size_t i) const + { + return *(first + i); + } +}; + template struct not_equal_t { T compare{}; diff --git a/cpp/src/detail/graph_utils.cuh b/cpp/src/detail/graph_utils.cuh index f7b0490f156..7c760bb020b 100644 --- a/cpp/src/detail/graph_utils.cuh +++ b/cpp/src/detail/graph_utils.cuh @@ -100,7 +100,24 @@ struct compute_gpu_id_from_int_edge_endpoints_t { thrust::upper_bound(thrust::seq, vertex_partition_range_lasts.begin(), vertex_partition_range_lasts.end(), - major))); + minor))); + return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } + + __device__ int operator()(thrust::tuple pair /* major, minor */) const + { + auto major_comm_rank = + static_cast(thrust::distance(vertex_partition_range_lasts.begin(), + thrust::upper_bound(thrust::seq, + vertex_partition_range_lasts.begin(), + vertex_partition_range_lasts.end(), + thrust::get<0>(pair)))); + auto minor_comm_rank = + static_cast(thrust::distance(vertex_partition_range_lasts.begin(), + thrust::upper_bound(thrust::seq, + vertex_partition_range_lasts.begin(), + vertex_partition_range_lasts.end(), + thrust::get<1>(pair)))); return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); } }; diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index cb1ef32b660..b750342ad17 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index a0f2d6f5338..af8b354f94d 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,13 +54,16 @@ namespace detail { template struct compute_local_edge_partition_id_t { + using vertex_t = typename thrust:: + tuple_element<0, typename thrust::iterator_traits::value_type>::type; + VertexPairIterator vertex_pair_first{}; size_t num_local_edge_partitions{}; raft::device_span edge_partition_major_range_lasts{}; __device__ int operator()(size_t i) const { - auto major = thurst::get<0>(*(vertex_pair_first + i)); + auto major = thrust::get<0>(*(vertex_pair_first + i)); return static_cast( thrust::distance(edge_partition_major_range_lasts.begin(), thrust::upper_bound(thrust::seq, @@ -80,9 +84,18 @@ struct compute_chunk_id_t { } }; +template +struct indirection_compare_less_t { + VertexPairIterator vertex_pair_first{}; + + __device__ bool operator()(size_t i, size_t j) const + { + return *(vertex_pair_first + i) < *(vertex_pair_first + j); + } +}; + template edge_partition{}; - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input{}; - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input{}; + thrust::optional> unique_vertices; + VertexValueInputIterator vertex_property_first; IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; @@ -104,29 +117,44 @@ struct call_intersection_op_t { __device__ void operator()(size_t i) const { + using property_t = typename thrust::iterator_traits::value_type; + auto index = *(major_minor_pair_index_first + i); auto pair = *(major_minor_pair_first + index); auto major = thrust::get<0>(pair); auto minor = thrust::get<1>(pair); - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); auto src = GraphViewType::is_storage_transposed ? minor : major; auto dst = GraphViewType::is_storage_transposed ? major : minor; - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); + + property_t src_prop{}; + property_t dst_prop{}; + if (unique_vertices) { + src_prop = *(vertex_property_first + + thrust::distance( + (*unique_vertices).begin(), + thrust::lower_bound( + thrust::seq, (*unique_vertices).begin(), (*unique_vertices).end(), src))); + dst_prop = *(vertex_property_first + + thrust::distance( + (*unique_vertices).begin(), + thrust::lower_bound( + thrust::seq, (*unique_vertices).begin(), (*unique_vertices).end(), dst))); + } else { + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + src_prop = *(vertex_property_first + src_offset); + dst_prop = *(vertex_property_first + src_offset); + } *(major_minor_pair_value_output_first + index) = evaluate_intersection_op() - .compute(src, - dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - intersection, - intersection_op); + .compute(src, dst, src_prop, dst_prop, intersection, intersection_op); } }; @@ -141,8 +169,7 @@ struct call_intersection_op_t { * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPairIterator Type of the iterator for input vertex pairs. - * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. - * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam VertexValueInputWrapper Type of the wrapper for vertex property values. * @tparam IntersectionOp Type of the quinary per intersection operator. * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property * variables. @@ -151,16 +178,8 @@ struct call_intersection_op_t { * @param graph_view Non-owning graph object. * @param vertex_pair_first Iterator pointing to the first (inclusive) input vertex pair. * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair. - * @param edge_src_value_input Wrapper used to access source input property values (for the edge - * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() - * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() - * (if @p e_op does not access source property values). Use update_edge_src_property to fill the - * wrapper. - * @param edge_dst_value_input Wrapper used to access destination input property values (for the - * edge destinations assigned to this process in multi-GPU). Use either - * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or - * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property - * values). Use update_edge_dst_property to fill the wrapper. + * @param vertex_src_value_input Wrapper used to access vertex input property values (for the + * vertices assigned to this process in multi-GPU). * @param intersection_op quinary operator takes first vertex of the pair, second vertex of the * pair, property values for the first vertex, property values for the second vertex, and a list of * vertices in the intersection of the first & second vertices' destination neighbors and returns an @@ -172,8 +191,7 @@ struct call_intersection_op_t { */ template void per_v_pair_transform_dst_nbr_intersection( @@ -181,39 +199,18 @@ void per_v_pair_transform_dst_nbr_intersection( GraphViewType const& graph_view, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, - EdgeSrcValueInputWrapper edge_src_value_input, - EdgeDstValueInputWrapper edge_dst_value_input, + VertexValueInputIterator vertex_value_input_first, IntersectionOp intersection_op, VertexPairValueOutputIterator vertex_pair_value_output_first, bool do_expensive_check = false) { static_assert(!GraphViewType::is_storage_transposed); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - using result_t = typename thrust::iterator_traits::value_type; - - using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_endpoint_dummy_property_device_view_t, - std::conditional_t, - detail::edge_partition_endpoint_property_device_view_t< - vertex_t, - typename EdgeSrcValueInputWrapper::value_iterator>>>; - using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_endpoint_dummy_property_device_view_t, - std::conditional_t, - detail::edge_partition_endpoint_property_device_view_t< - vertex_t, - typename EdgeDstValueInputWrapper::value_iterator>>>; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using property_t = typename thrust::iterator_traits::value_type; + using result_t = typename thrust::iterator_traits::value_type; if (do_expensive_check) { auto num_invalids = @@ -221,13 +218,51 @@ void per_v_pair_transform_dst_nbr_intersection( CUGRAPH_EXPECTS(num_invalids == 0, ""); } - rmm::device_uvector vertex_pair_indices( - thrust::distance(vertex_pair_first, vertex_pair_last), handle.get_stream()); + auto num_input_pairs = static_cast(thrust::distance(vertex_pair_first, vertex_pair_last)); + std::optional> unique_vertices{std::nullopt}; + std::optional(size_t{0}, rmm::cuda_stream_view{}))> + property_buffer_for_unique_vertices{std::nullopt}; + if constexpr (GraphViewType::is_multi_gpu) { + unique_vertices = rmm::device_uvector(num_input_pairs * 2, handle.get_stream()); + auto elem0_first = thrust::make_transform_iterator( + vertex_pair_first, + cugraph::thrust_tuple_get::value_type, + 0>{}); + thrust::copy(handle.get_thrust_policy(), + elem0_first, + elem0_first + num_input_pairs, + (*unique_vertices).begin()); + auto elem1_first = thrust::make_transform_iterator( + vertex_pair_first, + cugraph::thrust_tuple_get::value_type, + 1>{}); + thrust::copy(handle.get_thrust_policy(), + elem1_first, + elem1_first + num_input_pairs, + (*unique_vertices).begin() + num_input_pairs); + thrust::sort(handle.get_thrust_policy(), (*unique_vertices).begin(), (*unique_vertices).end()); + (*unique_vertices) + .resize(thrust::distance((*unique_vertices).begin(), + thrust::unique(handle.get_thrust_policy(), + (*unique_vertices).begin(), + (*unique_vertices).end())), + handle.get_stream()); + + property_buffer_for_unique_vertices = + collect_values_for_sorted_unique_vertices(handle.get_comms(), + (*unique_vertices).data(), + static_cast((*unique_vertices).size()), + vertex_value_input_first, + graph_view.vertex_partition_range_lasts(), + handle.get_stream()); + } + + rmm::device_uvector vertex_pair_indices(num_input_pairs, handle.get_stream()); thrust::sequence( handle.get_thrust_policy(), vertex_pair_indices.begin(), vertex_pair_indices.end(), size_t{0}); std::vector h_edge_partition_major_range_lasts( - graph_view.number_of_local_edge_partitions); + graph_view.number_of_local_edge_partitions()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_first(i); } @@ -236,7 +271,7 @@ void per_v_pair_transform_dst_nbr_intersection( raft::update_device(d_edge_partition_major_range_lasts.data(), h_edge_partition_major_range_lasts.data(), h_edge_partition_major_range_lasts.size(), - std::cout); + handle.get_stream()); auto d_edge_partition_group_sizes = groupby_and_count( vertex_pair_indices.begin(), vertex_pair_indices.end(), @@ -257,25 +292,14 @@ void per_v_pair_transform_dst_nbr_intersection( std::vector h_edge_partition_group_displacements(h_edge_partition_group_sizes.size()); std::exclusive_scan(h_edge_partition_group_sizes.begin(), h_edge_partition_group_sizes.end(), - h_edge_partition_group_displacements.begin()); + h_edge_partition_group_displacements.begin(), + size_t{0}); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - edge_partition_src_input_device_view_t edge_partition_src_value_input{}; - edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; - if constexpr (GraphViewType::is_storage_transposed) { - edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); - edge_partition_dst_value_input = - edge_partition_dst_input_device_view_t(edge_dst_value_input, i); - } else { - edge_partition_src_value_input = - edge_partition_src_input_device_view_t(edge_src_value_input, i); - edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); - } - auto edge_partition_vertex_pair_index_first = vertex_pair_indices.begin() + h_edge_partition_group_displacements[i]; @@ -284,7 +308,7 @@ void per_v_pair_transform_dst_nbr_intersection( // vertex degrees. to limit memory footprint ((1 << 15) is a tuning parameter) auto max_chunk_size = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 15); - auto max_num_chunks = (majors.size() + max_chunk_size - 1) / max_chunk_size; + auto max_num_chunks = (h_edge_partition_group_sizes[i] + max_chunk_size - 1) / max_chunk_size; if constexpr (GraphViewType::is_multi_gpu) { max_num_chunks = host_scalar_allreduce( handle.get_comms(), max_num_chunks, raft::comms::op_t::MAX, handle.get_stream()); @@ -292,35 +316,36 @@ void per_v_pair_transform_dst_nbr_intersection( std::vector h_chunk_sizes(max_num_chunks); if (h_chunk_sizes.size() > size_t{1}) { - auto d_chunk_sizes = - groupby_and_count(edge_partition_vertex_pair_index_first, - edge_partition_vertex_pair_index_first + h_edge_partition_group_sizes[i], - detail::compute_chunk_id_t{max_num_chunks}, - static_cast(max_num_chunks), - std::numeric_limits::max(), - handle.get_stream()); + auto d_chunk_sizes = groupby_and_count( + edge_partition_vertex_pair_index_first, + edge_partition_vertex_pair_index_first + h_edge_partition_group_sizes[i], + detail::compute_chunk_id_t{vertex_pair_first, max_num_chunks}, + static_cast(max_num_chunks), + std::numeric_limits::max(), + handle.get_stream()); raft::update_host( h_chunk_sizes.data(), d_chunk_sizes.data(), d_chunk_sizes.size(), handle.get_stream()); handle.sync_stream(); } else if (h_chunk_sizes.size() == size_t{1}) { - h_chunk_sizes[0] = majors.size(); + h_chunk_sizes[0] = h_edge_partition_group_sizes[i]; } - auto chunk_vertex_pair_index_first = edge_partition_vertx_pair_index_first; + auto chunk_vertex_pair_index_first = edge_partition_vertex_pair_index_first; for (size_t j = 0; j < h_chunk_sizes.size(); ++j) { auto this_chunk_size = h_chunk_sizes[j]; thrust::sort(handle.get_thrust_policy(), chunk_vertex_pair_index_first, chunk_vertex_pair_index_first + this_chunk_size, - indirection_copmare_less_t{ + detail::indirection_compare_less_t{ vertex_pair_first}); // detail::nbr_intersection() requires the input vertex // pairs to be sorted. - // FIXME: better restrict detail::nbr_intersection input vetex pairs to a single edge + // FIXME: better restrict detail::nbr_intersection input vertex pairs to a single edge // partition? This may provide additional performance improvement opportunities??? - auto chunk_vertex_pair_fist = thrust::make_transform_iterator( - chunk_vertex_pair_index_first, indirection_t{vertex_pair_first}); + auto chunk_vertex_pair_first = thrust::make_transform_iterator( + chunk_vertex_pair_index_first, + detail::indirection_t{vertex_pair_first}); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, @@ -329,25 +354,49 @@ void per_v_pair_transform_dst_nbr_intersection( std::array{true, true}, do_expensive_check); - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t{ - edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + if (unique_vertices) { + auto vertex_value_input_for_unique_vertices_first = + get_dataframe_buffer_begin(*property_buffer_for_unique_vertices); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t{ + edge_partition, + thrust::make_optional>((*unique_vertices).data(), + (*unique_vertices).size()), + vertex_value_input_for_unique_vertices_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); + } else { + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t{ + edge_partition, + thrust::optional>{thrust::nullopt}, + vertex_value_input_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); + } chunk_vertex_pair_index_first += this_chunk_size; } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c246025e88b..ce64c796c8c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -428,155 +428,161 @@ if(BUILD_CUGRAPH_MG_TESTS) set(GPU_COUNT "1") endif() - ########################################################################################### - # - MG SYMMETRIZE tests ------------------------------------------------------------------- + ############################################################################################### + # - MG SYMMETRIZE tests ----------------------------------------------------------------------- ConfigureTestMG(MG_SYMMETRIZE_TEST structure/mg_symmetrize_test.cpp) - ########################################################################################### - # - MG Transpose tests -------------------------------------------------------------------- + ############################################################################################### + # - MG Transpose tests ------------------------------------------------------------------------ ConfigureTestMG(MG_TRANSPOSE_TEST structure/mg_transpose_test.cpp) - ########################################################################################### - # - MG Transpose Storage tests ------------------------------------------------------------ + ############################################################################################### + # - MG Transpose Storage tests ---------------------------------------------------------------- ConfigureTestMG(MG_TRANSPOSE_STORAGE_TEST structure/mg_transpose_storage_test.cpp) - ########################################################################################### - # - MG Count self-loops and multi-edges tests --------------------------------------------- + ############################################################################################### + # - MG Count self-loops and multi-edges tests ------------------------------------------------- ConfigureTestMG(MG_COUNT_SELF_LOOPS_AND_MULTI_EDGES_TEST "structure/mg_count_self_loops_and_multi_edges_test.cpp") - ########################################################################################### - # - MG PAGERANK tests --------------------------------------------------------------------- + ############################################################################################### + # - MG PAGERANK tests ------------------------------------------------------------------------- ConfigureTestMG(MG_PAGERANK_TEST link_analysis/mg_pagerank_test.cpp) - ########################################################################################### - # - MG HITS tests ------------------------------------------------------------------------- + ############################################################################################### + # - MG HITS tests ----------------------------------------------------------------------------- ConfigureTestMG(MG_HITS_TEST link_analysis/mg_hits_test.cpp) - ########################################################################################### - # - MG KATZ CENTRALITY tests -------------------------------------------------------------- + ############################################################################################### + # - MG KATZ CENTRALITY tests ------------------------------------------------------------------ ConfigureTestMG(MG_KATZ_CENTRALITY_TEST centrality/mg_katz_centrality_test.cpp) - ########################################################################################### - # - MG EIGENVECTOR CENTRALITY tests -------------------------------------------------------------- + ############################################################################################### + # - MG EIGENVECTOR CENTRALITY tests ----------------------------------------------------------- ConfigureTestMG(MG_EIGENVECTOR_CENTRALITY_TEST centrality/mg_eigenvector_centrality_test.cpp) - ########################################################################################### - # - MG BFS tests -------------------------------------------------------------------------- + ############################################################################################### + # - MG BFS tests ------------------------------------------------------------------------------ ConfigureTestMG(MG_BFS_TEST traversal/mg_bfs_test.cpp) - ########################################################################################### - # - Extract BFS Paths tests --------------------------------------------------------------- + ############################################################################################### + # - Extract BFS Paths tests ------------------------------------------------------------------- ConfigureTestMG(MG_EXTRACT_BFS_PATHS_TEST traversal/mg_extract_bfs_paths_test.cu) - ########################################################################################### - # - MG SSSP tests ------------------------------------------------------------------------- + ############################################################################################### + # - MG SSSP tests ----------------------------------------------------------------------------- ConfigureTestMG(MG_SSSP_TEST traversal/mg_sssp_test.cpp) - ########################################################################################### - # - MG LOUVAIN tests ---------------------------------------------------------------------- + ############################################################################################### + # - MG LOUVAIN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_LOUVAIN_TEST community/mg_louvain_helper.cu community/mg_louvain_test.cpp) - ########################################################################################### - # - MG WEAKLY CONNECTED COMPONENTS tests -------------------------------------------------- + ############################################################################################### + # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST components/mg_weakly_connected_components_test.cpp) - ########################################################################################### - # - MG GRAPH BROADCAST tests -------------------------------------------------------------- + ############################################################################################### + # - MG GRAPH BROADCAST tests ------------------------------------------------------------------ ConfigureTestMG(MG_GRAPH_BROADCAST_TEST bcast/mg_graph_bcast.cpp) - ########################################################################################### - # - MG Core Number tests ------------------------------------------------------------------ + ############################################################################################### + # - MG Core Number tests ---------------------------------------------------------------------- ConfigureTestMG(MG_CORE_NUMBER_TEST cores/mg_core_number_test.cpp) - ########################################################################################### - # - MG TRIANGLE COUNT tests --------------------------------------------------------------- + ############################################################################################### + # - MG TRIANGLE COUNT tests ------------------------------------------------------------------- ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) - ########################################################################################### - # - MG PRIMS COUNT_IF_V tests ------------------------------------------------------------- + ############################################################################################### + # - MG PRIMS COUNT_IF_V tests ----------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) target_link_libraries(MG_COUNT_IF_V_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST tests -------------------------- + ############################################################################################### + # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST tests ------------------------------ ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu) target_link_libraries(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS REDUCE_V tests --------------------------------------------------------------- + ############################################################################################### + # - MG PRIMS REDUCE_V tests ------------------------------------------------------------------- ConfigureTestMG(MG_REDUCE_V_TEST prims/mg_reduce_v.cu) target_link_libraries(MG_REDUCE_V_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS TRANSFORM_REDUCE_V tests ----------------------------------------------------- + ############################################################################################### + # - MG PRIMS TRANSFORM_REDUCE_V tests --------------------------------------------------------- ConfigureTestMG(MG_TRANSFORM_REDUCE_V_TEST prims/mg_transform_reduce_v.cu) target_link_libraries(MG_TRANSFORM_REDUCE_V_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS TRANSFORM_REDUCE_E tests ----------------------------------------------------- + ############################################################################################### + # - MG PRIMS TRANSFORM_REDUCE_E tests --------------------------------------------------------- ConfigureTestMG(MG_TRANSFORM_REDUCE_E_TEST prims/mg_transform_reduce_e.cu) target_link_libraries(MG_TRANSFORM_REDUCE_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS COUNT_IF_E tests ------------------------------------------------------------- + ############################################################################################### + # - MG PRIMS COUNT_IF_E tests ----------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_E_TEST prims/mg_count_if_e.cu) target_link_libraries(MG_COUNT_IF_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E tests ----------------------------- + ############################################################################################### + # - MG PRIMS PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E tests --------------------------------- ConfigureTestMG(MG_PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E_TEST prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu) target_link_libraries(MG_PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS EXTRACT_IF_E tests ----------------------------------------------------------- + ############################################################################################### + # - MG PRIMS EXTRACT_IF_E tests --------------------------------------------------------------- ConfigureTestMG(MG_EXTRACT_IF_E_TEST prims/mg_extract_if_e.cu) target_link_libraries(MG_EXTRACT_IF_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests -------------------------------- + ############################################################################################### + # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------ ConfigureTestMG(MG_EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E_TEST prims/mg_extract_transform_v_frontier_outgoing_e.cu) target_link_libraries(MG_EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG PRIMS PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E tests ------------------------------- + ############################################################################################### + # - MG PRIMS PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E tests ----------------------------------- ConfigureTestMG(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST prims/mg_per_v_random_select_transform_outgoing_e.cu) target_link_libraries(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG GATHER_UTILS tests ----------------------------------------------------------------- + ############################################################################################### + # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION tests ---------------------------------- + ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST + prims/mg_per_v_pair_transform_dst_nbr_intersection.cu) + target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST PRIVATE cuco::cuco) + + ############################################################################################### + # - MG GATHER_UTILS tests --------------------------------------------------------------------- ConfigureTestMG(MG_GATHER_UTILS_TEST sampling/detail/mg_gather_utils.cu) target_link_libraries(MG_GATHER_UTILS_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG GATHER_ONE_HOP tests --------------------------------------------------------------- + ############################################################################################### + # - MG GATHER_ONE_HOP tests ------------------------------------------------------------------- ConfigureTestMG(MG_GATHER_ONE_HOP_TEST sampling/detail/mg_gather_one_hop.cu) target_link_libraries(MG_GATHER_ONE_HOP_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - MG NBR SAMPLING tests ----------------------------------------------------------------- + ############################################################################################### + # - MG NBR SAMPLING tests --------------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu) target_link_libraries(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST PRIVATE cuco::cuco) - ########################################################################################### - # - RANDOM_WALKS tests -------------------------------------------------------------------- -# ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) + ############################################################################################### + # - RANDOM_WALKS tests ------------------------------------------------------------------------ + # ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) - ################################################################################################### - # - SIMILARITY tests ------------------------------------------------------------------------------ + ############################################################################################### + # - SIMILARITY tests -------------------------------------------------------------------------- ConfigureTestMG(MG_SIMILARITY_TEST link_prediction/mg_similarity_test.cpp) - ########################################################################################### - # - MG C API tests ------------------------------------------------------------------------ + ############################################################################################### + # - MG C API tests ---------------------------------------------------------------------------- ConfigureCTestMG(MG_CAPI_CREATE_GRAPH_TEST c_api/mg_create_graph_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_PAGERANK_TEST c_api/mg_pagerank_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_BFS_TEST c_api/mg_bfs_test.c c_api/mg_test_utils.cpp) @@ -598,7 +604,7 @@ endif() # - C API tests ----------------------------------------------------------------------------------- ################################################################################################### -# - common C API test utils ----------------------------------------------------------------------------- +# - common C API test utils ----------------------------------------------------------------------- add_library(cugraph_c_testutil STATIC c_api/test_utils.cpp) From 789f15c81ce3ef14d82416a25c9ef0e027ae2fc7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 27 Sep 2022 14:43:25 -0700 Subject: [PATCH 17/18] test MG prim test --- ...r_v_pair_transform_dst_nbr_intersection.cu | 273 ++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu new file mode 100644 index 00000000000..73a1c9c0ae5 --- /dev/null +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include + +template +struct intersection_op_t { + __device__ thrust::tuple operator()( + vertex_t, + vertex_t, + edge_t v0_prop /* out degree */, + edge_t v1_prop /* out degree */, + raft::device_span intersection) const + { + return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); + } +}; + +struct Prims_Usecase { + size_t num_vertex_pairs{0}; + bool check_correctness{true}; +}; + +template +class Tests_MGPerVPairTransformDstNbrIntersection + : public ::testing::TestWithParam> { + public: + Tests_MGPerVPairTransformDstNbrIntersection() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResClock hr_clock{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + auto const row_comm_size = handle_->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()).get_size(); + auto const col_comm_size = handle_->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()).get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_clock.start(); + } + + auto [mg_graph, d_mg_renumber_map_labels] = + cugraph::test::construct_graph( + *handle_, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive + + ASSERT_TRUE( + mg_graph_view.number_of_vertices() > + vertex_t{0}); // the code below to generate vertex pairs is invalid for an empty graph. + + auto mg_vertex_pair_buffer = + cugraph::allocate_dataframe_buffer>( + prims_usecase.num_vertex_pairs / comm_size + + (static_cast(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0), + handle_->get_stream()); + thrust::tabulate( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { + cuco::detail::MurmurHash3_32 + hash_func{}; // use hash_func to generate arbitrary vertex pairs + auto v0 = static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + return thrust::make_tuple(v0, v1); + }); + + auto h_vertex_partition_range_lasts = mg_graph_view.vertex_partition_range_lasts(); + rmm::device_uvector d_vertex_partition_range_lasts(h_vertex_partition_range_lasts.size(), handle_->get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), h_vertex_partition_range_lasts.data(), h_vertex_partition_range_lasts.size(), handle_->get_stream()); + std::tie(mg_vertex_pair_buffer, std::ignore) = cugraph::groupby_gpu_id_and_shuffle_values( + handle_->get_comms(), + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{raft::device_span(d_vertex_partition_range_lasts.data(), d_vertex_partition_range_lasts.size()), comm_size, row_comm_size, col_comm_size}, + handle_->get_stream()); + + auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); + auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_clock.start(); + } + + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + mg_graph_view, + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + mg_out_degrees.begin(), + intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(mg_result_buffer)); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG per_v_pair_transform_dst_nbr_intersection took " << elapsed_time * 1e-6 + << " s.\n"; + } + + // 3. validate MG results + + if (prims_usecase.check_correctness) { + cugraph::unrenumber_int_vertices(*handle_, std::get<0>(mg_vertex_pair_buffer).data(), cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), (*d_mg_renumber_map_labels).data(), h_vertex_partition_range_lasts); + cugraph::unrenumber_int_vertices(*handle_, std::get<1>(mg_vertex_pair_buffer).data(), cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), (*d_mg_renumber_map_labels).data(), h_vertex_partition_range_lasts); + + cugraph::graph_t unrenumbered_graph(*handle_); + std::tie(unrenumbered_graph, std::ignore) = + cugraph::test::construct_graph( + *handle_, input_usecase, false, false); + + auto unrenumbered_graph_view = unrenumbered_graph.view(); + + auto sg_result_buffer = cugraph::allocate_dataframe_buffer>(cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); + auto sg_out_degrees = unrenumbered_graph_view.compute_out_degrees(*handle_); + + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + unrenumbered_graph_view, + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer /* now unrenumbered */), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer /* now unrenumbered */), + sg_out_degrees.begin(), + intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + bool valid = thrust::equal(handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_result_buffer), + cugraph::get_dataframe_buffer_end(mg_result_buffer), + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + valid = static_cast(cugraph::host_scalar_allreduce( + handle_->get_comms(), static_cast(valid), raft::comms::op_t::MIN, handle_->get_stream())); + ASSERT_TRUE(valid); + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGPerVPairTransformDstNbrIntersection::handle_ = nullptr; + +using Tests_MGPerVPairTransformDstNbrIntersection_File = + Tests_MGPerVPairTransformDstNbrIntersection; +using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = + Tests_MGPerVPairTransformDstNbrIntersection; + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGPerVPairTransformDstNbrIntersection_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() From 69f307ce80b9e0011563ff724a138e9aadb3990d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 27 Sep 2022 15:17:18 -0700 Subject: [PATCH 18/18] clang-format --- ...r_v_pair_transform_dst_nbr_intersection.cu | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 73a1c9c0ae5..457e309a65b 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -30,9 +30,9 @@ #include -#include #include #include +#include #include #include @@ -82,8 +82,10 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto const comm_rank = handle_->get_comms().get_rank(); auto const comm_size = handle_->get_comms().get_size(); - auto const row_comm_size = handle_->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()).get_size(); - auto const col_comm_size = handle_->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()).get_size(); + auto const row_comm_size = + handle_->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()).get_size(); + auto const col_comm_size = + handle_->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()).get_size(); // 1. create MG graph @@ -131,13 +133,22 @@ class Tests_MGPerVPairTransformDstNbrIntersection }); auto h_vertex_partition_range_lasts = mg_graph_view.vertex_partition_range_lasts(); - rmm::device_uvector d_vertex_partition_range_lasts(h_vertex_partition_range_lasts.size(), handle_->get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), h_vertex_partition_range_lasts.data(), h_vertex_partition_range_lasts.size(), handle_->get_stream()); + rmm::device_uvector d_vertex_partition_range_lasts( + h_vertex_partition_range_lasts.size(), handle_->get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + h_vertex_partition_range_lasts.data(), + h_vertex_partition_range_lasts.size(), + handle_->get_stream()); std::tie(mg_vertex_pair_buffer, std::ignore) = cugraph::groupby_gpu_id_and_shuffle_values( handle_->get_comms(), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{raft::device_span(d_vertex_partition_range_lasts.data(), d_vertex_partition_range_lasts.size()), comm_size, row_comm_size, col_comm_size}, + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + row_comm_size, + col_comm_size}, handle_->get_stream()); auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( @@ -171,8 +182,18 @@ class Tests_MGPerVPairTransformDstNbrIntersection // 3. validate MG results if (prims_usecase.check_correctness) { - cugraph::unrenumber_int_vertices(*handle_, std::get<0>(mg_vertex_pair_buffer).data(), cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), (*d_mg_renumber_map_labels).data(), h_vertex_partition_range_lasts); - cugraph::unrenumber_int_vertices(*handle_, std::get<1>(mg_vertex_pair_buffer).data(), cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), (*d_mg_renumber_map_labels).data(), h_vertex_partition_range_lasts); + cugraph::unrenumber_int_vertices( + *handle_, + std::get<0>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*d_mg_renumber_map_labels).data(), + h_vertex_partition_range_lasts); + cugraph::unrenumber_int_vertices( + *handle_, + std::get<1>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*d_mg_renumber_map_labels).data(), + h_vertex_partition_range_lasts); cugraph::graph_t unrenumbered_graph(*handle_); std::tie(unrenumbered_graph, std::ignore) = @@ -181,7 +202,8 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto unrenumbered_graph_view = unrenumbered_graph.view(); - auto sg_result_buffer = cugraph::allocate_dataframe_buffer>(cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); + auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); auto sg_out_degrees = unrenumbered_graph_view.compute_out_degrees(*handle_); cugraph::per_v_pair_transform_dst_nbr_intersection( @@ -198,8 +220,10 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::get_dataframe_buffer_end(mg_result_buffer), cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - valid = static_cast(cugraph::host_scalar_allreduce( - handle_->get_comms(), static_cast(valid), raft::comms::op_t::MIN, handle_->get_stream())); + valid = static_cast(cugraph::host_scalar_allreduce(handle_->get_comms(), + static_cast(valid), + raft::comms::op_t::MIN, + handle_->get_stream())); ASSERT_TRUE(valid); } }