From 0f7860e4863930952b40c08c06a817df3bbe7bd0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 30 Jan 2024 14:35:56 -0800 Subject: [PATCH 01/18] update weight_sum_test to match other tests --- cpp/tests/structure/weight_sum_test.cpp | 249 ++++++++++++++++-------- 1 file changed, 168 insertions(+), 81 deletions(-) diff --git a/cpp/tests/structure/weight_sum_test.cpp b/cpp/tests/structure/weight_sum_test.cpp index a61c0d4eeb4..916a5c51ecb 100644 --- a/cpp/tests/structure/weight_sum_test.cpp +++ b/cpp/tests/structure/weight_sum_test.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ void weight_sum_reference(edge_t const* offsets, for (vertex_t i = 0; i < num_vertices; ++i) { if (major) { weight_sums[i] = - std::accumulate(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); + std::reduce(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); } else { for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { auto nbr = indices[j]; @@ -60,19 +61,12 @@ void weight_sum_reference(edge_t const* offsets, } typedef struct WeightSum_Usecase_t { - std::string graph_file_full_path{}; - - WeightSum_Usecase_t(std::string const& graph_file_path) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - }; + bool edge_masking{false}; + bool check_correctness{true}; } WeightSum_Usecase; -class Tests_WeightSum : public ::testing::TestWithParam { +template +class Tests_WeightSum : public ::testing::TestWithParam> { public: Tests_WeightSum() {} @@ -83,96 +77,189 @@ class Tests_WeightSum : public ::testing::TestWithParam { virtual void TearDown() {} template - void run_current_test(WeightSum_Usecase const& configuration) + void run_current_test(WeightSum_Usecase const& weight_sum_usecase, + input_usecase_t const& input_usecase) { + constexpr bool renumber = true; + constexpr bool test_weighted = true; + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, test_weighted, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } - cugraph::graph_t graph(handle); - std::optional< - cugraph::edge_property_t, - weight_t>> - edge_weights{std::nullopt}; - std::tie(graph, edge_weights, std::ignore) = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true, false); auto graph_view = graph.view(); auto edge_weight_view = edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; - auto h_offsets = - cugraph::test::to_host(handle, graph_view.local_edge_partition_view().offsets()); - auto h_indices = - cugraph::test::to_host(handle, graph_view.local_edge_partition_view().indices()); - auto h_weights = cugraph::test::to_host( - handle, - raft::device_span((*edge_weight_view).value_firsts()[0], - (*edge_weight_view).edge_counts()[0])); - - std::vector h_reference_in_weight_sums(graph_view.number_of_vertices()); - std::vector h_reference_out_weight_sums(graph_view.number_of_vertices()); - - weight_sum_reference(h_offsets.data(), - h_indices.data(), - h_weights.data(), - h_reference_in_weight_sums.data(), - graph_view.number_of_vertices(), - store_transposed); - - weight_sum_reference(h_offsets.data(), - h_indices.data(), - h_weights.data(), - h_reference_out_weight_sums.data(), - graph_view.number_of_vertices(), - !store_transposed); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Compute in-weight sums"); + } auto d_in_weight_sums = cugraph::compute_in_weight_sums(handle, graph_view, *edge_weight_view); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Compute out-weight sums"); + } + auto d_out_weight_sums = cugraph::compute_out_weight_sums(handle, graph_view, *edge_weight_view); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - auto h_cugraph_in_weight_sums = cugraph::test::to_host(handle, d_in_weight_sums); - auto h_cugraph_out_weight_sums = cugraph::test::to_host(handle, d_out_weight_sums); - - auto threshold_ratio = weight_t{1e-4}; - auto threshold_magnitude = std::numeric_limits::min(); - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - ASSERT_TRUE(std::equal(h_reference_in_weight_sums.begin(), - h_reference_in_weight_sums.end(), - h_cugraph_in_weight_sums.begin(), - nearly_equal)) - << "In-weight-sum values do not match with the reference values."; - ASSERT_TRUE(std::equal(h_reference_out_weight_sums.begin(), - h_reference_out_weight_sums.end(), - h_cugraph_out_weight_sums.begin(), - nearly_equal)) - << "Out-weight-sum values do not match with the reference values."; + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (weight_sum_usecase.check_correctness) { + auto [h_offsets, h_indices, h_weights] = cugraph::test::graph_to_host_csr(handle, graph_view, edge_weight_view); + + std::vector h_reference_in_weight_sums(graph_view.number_of_vertices()); + std::vector h_reference_out_weight_sums(graph_view.number_of_vertices()); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + (*h_weights).data(), + h_reference_in_weight_sums.data(), + graph_view.number_of_vertices(), + store_transposed); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + (*h_weights).data(), + h_reference_out_weight_sums.data(), + graph_view.number_of_vertices(), + !store_transposed); + + auto h_cugraph_in_weight_sums = cugraph::test::to_host(handle, d_in_weight_sums); + auto h_cugraph_out_weight_sums = cugraph::test::to_host(handle, d_out_weight_sums); + + auto threshold_ratio = weight_t{2.0 * 1e-4}; + auto threshold_magnitude = std::numeric_limits::min(); + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + auto ret = + std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_in_weight_sums.begin(), + h_reference_in_weight_sums.end(), + h_cugraph_in_weight_sums.begin(), + nearly_equal)) + << "In-weight-sum values do not match with the reference values."; + + ASSERT_TRUE(std::equal(h_reference_out_weight_sums.begin(), + h_reference_out_weight_sums.end(), + h_cugraph_out_weight_sums.begin(), + nearly_equal)) + << "Out-weight-sum values do not match with the reference values."; + } } }; -// FIXME: add tests for type combinations +using Tests_WeightSum_File = Tests_WeightSum; +using Tests_WeightSum_Rmat = Tests_WeightSum; + +TEST_P(Tests_WeightSum_File, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_WeightSum_File, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_WeightSum_Rmat, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} -TEST_P(Tests_WeightSum, CheckInt32Int32FloatTransposeFalse) +TEST_P(Tests_WeightSum_Rmat, CheckInt32Int32FloatTransposeTrue) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_WeightSum, CheckInt32Int32FloatTransposeTrue) +TEST_P(Tests_WeightSum_Rmat, CheckInt32Int64FloatTransposeFalse) { - run_current_test(GetParam()); + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); } -INSTANTIATE_TEST_SUITE_P(simple_test, - Tests_WeightSum, - ::testing::Values(WeightSum_Usecase("test/datasets/karate.mtx"), - WeightSum_Usecase("test/datasets/web-Google.mtx"), - WeightSum_Usecase("test/datasets/ljournal-2008.mtx"), - WeightSum_Usecase("test/datasets/webbase-1M.mtx"))); +TEST_P(Tests_WeightSum_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_WeightSum_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_WeightSum_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_WeightSum_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(WeightSum_Usecase{false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_WeightSum_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values(WeightSum_Usecase{false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_WeightSum_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(WeightSum_Usecase{false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From f22e31b37a50fc6b1087e0a0ba56d52f193f9d47 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 30 Jan 2024 14:46:02 -0800 Subject: [PATCH 02/18] enalbe edge masking in compute_in|out_weight_sums --- cpp/src/structure/graph_weight_utils_impl.cuh | 10 ---------- cpp/tests/structure/weight_sum_test.cpp | 6 +++--- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/cpp/src/structure/graph_weight_utils_impl.cuh b/cpp/src/structure/graph_weight_utils_impl.cuh index 1e386792b21..e97266c557a 100644 --- a/cpp/src/structure/graph_weight_utils_impl.cuh +++ b/cpp/src/structure/graph_weight_utils_impl.cuh @@ -89,8 +89,6 @@ rmm::device_uvector compute_in_weight_sums( graph_view_t const& graph_view, edge_property_view_t edge_weight_view) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (store_transposed) { return compute_weight_sums(handle, graph_view, edge_weight_view); } else { @@ -108,8 +106,6 @@ rmm::device_uvector compute_out_weight_sums( graph_view_t const& graph_view, edge_property_view_t edge_weight_view) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (store_transposed) { return compute_weight_sums(handle, graph_view, edge_weight_view); } else { @@ -127,8 +123,6 @@ weight_t compute_max_in_weight_sum( graph_view_t const& graph_view, edge_property_view_t edge_weight_view) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - auto in_weight_sums = compute_in_weight_sums(handle, graph_view, edge_weight_view); auto it = thrust::max_element(handle.get_thrust_policy(), in_weight_sums.begin(), in_weight_sums.end()); @@ -153,8 +147,6 @@ weight_t compute_max_out_weight_sum( graph_view_t const& graph_view, edge_property_view_t edge_weight_view) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - auto out_weight_sums = compute_out_weight_sums(handle, graph_view, edge_weight_view); auto it = thrust::max_element(handle.get_thrust_policy(), out_weight_sums.begin(), out_weight_sums.end()); @@ -179,8 +171,6 @@ weight_t compute_total_edge_weight( graph_view_t const& graph_view, edge_property_view_t edge_weight_view) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - return transform_reduce_e( handle, graph_view, diff --git a/cpp/tests/structure/weight_sum_test.cpp b/cpp/tests/structure/weight_sum_test.cpp index 916a5c51ecb..d53ddeb4e48 100644 --- a/cpp/tests/structure/weight_sum_test.cpp +++ b/cpp/tests/structure/weight_sum_test.cpp @@ -236,7 +236,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_WeightSum_File, ::testing::Combine( // enable correctness checks - ::testing::Values(WeightSum_Usecase{false}), + ::testing::Values(WeightSum_Usecase{false}, WeightSum_Usecase{true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -247,7 +247,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_WeightSum_Rmat, ::testing::Combine( // enable correctness checks - ::testing::Values(WeightSum_Usecase{false}), + ::testing::Values(WeightSum_Usecase{false}, WeightSum_Usecase{true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( @@ -259,7 +259,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_WeightSum_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(WeightSum_Usecase{false, false}), + ::testing::Values(WeightSum_Usecase{false, false}, WeightSum_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From d6848f5412e5ca8a57ebded830e95de751eba3fa Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 30 Jan 2024 15:13:30 -0800 Subject: [PATCH 03/18] test edge masking in mg_per_v_pair_transfomr_dst_nbr_weighted_intersection.cu --- ...transform_dst_nbr_weighted_intersection.cu | 61 ++++++++++--------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 4d05b0c9e65..be92c82776a 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "property_generator.cuh" + #include #include #include @@ -84,6 +86,7 @@ struct intersection_op_t { struct Prims_Usecase { size_t num_vertex_pairs{0}; + bool edge_masking{false}; bool check_correctness{true}; }; @@ -109,6 +112,13 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto const comm_rank = handle_->get_comms().get_rank(); auto const comm_size = handle_->get_comms().get_size(); + constexpr bool store_transposed = false; + + constexpr bool test_weighted = true; + constexpr bool renumber = true; + constexpr bool drop_self_loops = false; + constexpr bool drop_multi_edges = true; + // 1. create MG graph if (cugraph::test::g_perf) { @@ -117,29 +127,10 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.start("MG Construct graph"); } - constexpr bool store_transposed = false; - constexpr bool multi_gpu = true; - - cugraph::graph_t mg_graph(*handle_); - std::optional< - cugraph::edge_property_t, - weight_t>> - mg_edge_weight{std::nullopt}; - - std::optional> mg_renumber_map{std::nullopt}; - - constexpr bool test_weighted = true; - constexpr bool renumber = true; - constexpr bool drop_self_loops = false; - constexpr bool drop_multi_edges = true; - - std::tie(mg_graph, mg_edge_weight, mg_renumber_map) = - cugraph::test::construct_graph( + auto [mg_graph, mg_edge_weight, mg_renumber_map] = + cugraph::test::construct_graph( *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); - auto mg_graph_view = mg_graph.view(); - auto mg_edge_weight_view = (*mg_edge_weight).view(); - if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -147,6 +138,16 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.display_and_clear(std::cout); } + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = (*mg_edge_weight).view(); + + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive ASSERT_TRUE( @@ -355,15 +356,18 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{10}, true}), + ::testing::Values(Prims_Usecase{size_t{10}, false, true}, + Prims_Usecase{size_t{10}, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_MGPerVPairTransformDstNbrIntersection_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024}, false, true}, + Prims_Usecase{size_t{1024}, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -373,7 +377,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGPerVPairTransformDstNbrIntersection_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false, false}, + Prims_Usecase{size_t{1024 * 1024}, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 51efd365fdbae0d33eeb57212a7a97acc5e75c9b Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 31 Jan 2024 09:49:53 -0800 Subject: [PATCH 04/18] bug fix --- cpp/src/prims/detail/nbr_intersection.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 8261ec747f9..29062368462 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -213,8 +213,7 @@ struct update_rx_major_local_nbrs_t { auto mask_first = (*edge_partition_e_mask).value_first(); if constexpr (!std::is_same_v) { auto input_first = - thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + - edge_offset; + thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()); copy_if_mask_set(input_first, mask_first, thrust::make_zip_iterator(local_nbrs_for_rx_majors.begin(), From 3b814a37c55038e4a9a922a7d6f7232a4a9254cc Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 1 Feb 2024 09:41:09 -0800 Subject: [PATCH 05/18] add const to functions that can be const --- cpp/src/centrality/betweenness_centrality_impl.cuh | 2 +- cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh | 2 +- cpp/src/structure/induced_subgraph_impl.cuh | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index e496344583c..8826bcd50f2 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -69,7 +69,7 @@ struct extract_edge_e_op_t { vertex_t dst, thrust::tuple src_props, thrust::tuple dst_props, - weight_t edge_centrality) + weight_t edge_centrality) const { return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) ? thrust::optional>{thrust::make_tuple(src, dst)} diff --git a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh index 74267d02b38..d669bd44088 100644 --- a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh +++ b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh @@ -42,7 +42,7 @@ struct return_edges_with_properties_e_op { vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, - EdgeProperties edge_properties) + EdgeProperties edge_properties) const { static_assert(std::is_same_v || std::is_same_v>); diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index 18e1af32a71..f6bb7ee605d 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -71,7 +71,7 @@ struct induced_subgraph_weighted_edge_op { vertex_t dst, property_t sv, property_t dv, - weight_t wgt) + weight_t wgt) const { size_t subgraph = thrust::get<1>(tagged_src); return thrust::binary_search(thrust::seq, @@ -95,7 +95,7 @@ struct induced_subgraph_unweighted_edge_op { vertex_t dst, property_t sv, property_t dv, - thrust::nullopt_t) + thrust::nullopt_t) const { size_t subgraph = thrust::get<1>(tagged_src); return thrust::binary_search(thrust::seq, From 8f48eb917acef954a3bb1c3b8840b130a8d0aecf Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 2 Feb 2024 09:25:48 -0800 Subject: [PATCH 06/18] update detail::extract_transform_v_frontier_e to support edge masking --- .../detail/extract_transform_v_frontier_e.cuh | 574 ++++++++---------- cpp/src/prims/detail/prim_functors.cuh | 70 ++- ...v_transform_reduce_incoming_outgoing_e.cuh | 4 + 3 files changed, 327 insertions(+), 321 deletions(-) diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 2d77d64e1ff..4f53f525673 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -87,15 +88,17 @@ __device__ void push_buffer_element(e_op_result_t e_op_result, } } -template -__global__ void extract_transform_v_frontier_e_hypersparse( +__global__ void extract_transform_v_frontier_e_hypersparse_or_low_degree( edge_partition_device_view_t edge_partition, @@ -104,6 +107,7 @@ __global__ void extract_transform_v_frontier_e_hypersparse( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -160,250 +164,124 @@ __global__ void extract_transform_v_frontier_e_hypersparse( } else { major = thrust::get<0>(key); } - auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); - if (major_hypersparse_idx) { - auto major_idx = major_start_offset + *major_hypersparse_idx; - local_degree = edge_partition.local_degree(major_idx); - warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_idx); - } else { - local_degree = edge_t{0}; - warp_key_local_edge_offsets[threadIdx.x] = edge_t{0}; // dummy - } - } - WarpScan(temp_storage) - .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); - __syncwarp(); - - // process local edges for the keys in [key_first + min_key_idx, key_first + max_key_idx) - - auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + - (max_key_idx - min_key_idx) - 1]; - auto rounded_up_num_edges_this_warp = - ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * - raft::warp_size(); - - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{}; - - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(), - thrust::upper_bound(thrust::seq, - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(), - warp_local_degree_inclusive_sums + warp_id * raft::warp_size() + - (max_key_idx - min_key_idx), - i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - - ((key_idx_this_warp == 0) - ? edge_t{0} - : warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + - key_idx_this_warp - 1])); - auto key = *(key_first + (min_key_idx + key_idx_this_warp)); - vertex_t major{}; - if constexpr (std::is_same_v) { - major = key; - } else { - major = thrust::get<0>(key); - } - auto minor = indices[local_edge_offset]; - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t - key_or_src{}; // key if major - std::conditional_t - key_or_dst{}; // key if major - if constexpr (GraphViewType::is_storage_transposed) { - key_or_src = minor; - key_or_dst = key; + if constexpr (hypersparse) { + auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); + if (major_hypersparse_idx) { + auto major_idx = major_start_offset + *major_hypersparse_idx; + local_degree = edge_partition.local_degree(major_idx); + warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_idx); } else { - key_or_src = key; - key_or_dst = minor; - } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - e_op_result = e_op(key_or_src, - key_or_dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(local_edge_offset)); - } - auto ballot_e_op = - __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot_e_op) { - if (lane_id == 0) { - auto increment = __popc(ballot_e_op); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = - static_cast(atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); - } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot_e_op & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); + local_degree = edge_t{0}; + warp_key_local_edge_offsets[threadIdx.x] = edge_t{0}; // dummy } - } - } - idx += gridDim.x * blockDim.x; - } -} - -template -__global__ void extract_transform_v_frontier_e_low_degree( - edge_partition_device_view_t edge_partition, - KeyIterator key_first, - KeyIterator key_last, - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - BufferKeyOutputIterator buffer_key_output_first, - BufferValueOutputIterator buffer_value_output_first, - size_t* buffer_idx_ptr, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using key_t = typename thrust::iterator_traits::value_type; - using e_op_result_t = - typename edge_op_result_type::type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto const warp_id = threadIdx.x / raft::warp_size(); - auto const lane_id = tid % raft::warp_size(); - auto idx = static_cast(tid); - - __shared__ edge_t - warp_local_degree_inclusive_sums[extract_transform_v_frontier_e_kernel_block_size]; - __shared__ edge_t warp_key_local_edge_offsets[extract_transform_v_frontier_e_kernel_block_size]; - - using WarpScan = cub::WarpScan; - __shared__ typename WarpScan::TempStorage temp_storage; - - __shared__ size_t - buffer_warp_start_indices[extract_transform_v_frontier_e_kernel_block_size / raft::warp_size()]; - - auto indices = edge_partition.indices(); - - vertex_t num_keys = static_cast(thrust::distance(key_first, key_last)); - auto rounded_up_num_keys = - ((static_cast(num_keys) + (raft::warp_size() - 1)) / raft::warp_size()) * - raft::warp_size(); - while (idx < rounded_up_num_keys) { - auto min_key_idx = static_cast(idx - (idx % raft::warp_size())); // inclusive - auto max_key_idx = - static_cast(std::min(static_cast(min_key_idx) + raft::warp_size(), - static_cast(num_keys))); // exclusive - - // update warp_local_degree_inclusive_sums & warp_key_local_edge_offsets - - edge_t local_degree{0}; - if (lane_id < static_cast(max_key_idx - min_key_idx)) { - auto key = *(key_first + idx); - vertex_t major{}; - if constexpr (std::is_same_v) { - major = key; } else { - major = thrust::get<0>(key); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + local_degree = edge_partition.local_degree(major_offset); + warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_offset); } - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - local_degree = edge_partition.local_degree(major_offset); - warp_key_local_edge_offsets[threadIdx.x] = edge_partition.local_offset(major_offset); } WarpScan(temp_storage) .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); __syncwarp(); - // processes local edges for the keys in [key_first + min_key_idx, key_first + max_key_idx) + // all the threads in a warp collectively process local edges for the keys in [key_first + + // min_key_idx, key_first + max_key_idx) auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + (max_key_idx - min_key_idx) - 1]; auto rounded_up_num_edges_this_warp = ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{}; - - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(), - thrust::upper_bound(thrust::seq, - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(), - warp_local_degree_inclusive_sums + warp_id * raft::warp_size() + - (max_key_idx - min_key_idx), - i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - - ((key_idx_this_warp == 0) - ? edge_t{0} - : warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + - key_idx_this_warp - 1])); - auto key = *(key_first + (min_key_idx + key_idx_this_warp)); - vertex_t major{}; - if constexpr (std::is_same_v) { - major = key; - } else { - major = thrust::get<0>(key); + + auto call_e_op = call_e_op_with_key_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op}; + + auto this_warp_inclusive_sum_first = + warp_local_degree_inclusive_sums + warp_id * raft::warp_size(); + auto this_warp_inclusive_sum_last = this_warp_inclusive_sum_first + (max_key_idx - min_key_idx); + + if (edge_partition_e_mask) { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + if ((*edge_partition_e_mask).get(local_edge_offset)) { + auto key = *(key_first + (min_key_idx + key_idx_this_warp)); + e_op_result = call_e_op(key, local_edge_offset); + } } - auto minor = indices[local_edge_offset]; - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t - key_or_src{}; // key if major - std::conditional_t - key_or_dst{}; // key if major - if constexpr (GraphViewType::is_storage_transposed) { - key_or_src = minor; - key_or_dst = key; - } else { - key_or_src = key; - key_or_dst = minor; + auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); + if (ballot > 0) { + if (lane_id == 0) { + auto increment = __popc(ballot); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_warp_start_indices[warp_id] = static_cast( + atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(increment))); + } + __syncwarp(); + if (e_op_result) { + auto buffer_warp_offset = + static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_warp_start_indices[warp_id] + buffer_warp_offset); + } } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - e_op_result = e_op(key_or_src, - key_or_dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(local_edge_offset)); } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = - static_cast(atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); + } else { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + auto key = *(key_first + (min_key_idx + key_idx_this_warp)); + e_op_result = call_e_op(key, local_edge_offset); } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); + auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); + if (ballot > 0) { + if (lane_id == 0) { + auto increment = __popc(ballot); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_warp_start_indices[warp_id] = static_cast( + atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(increment))); + } + __syncwarp(); + if (e_op_result) { + auto buffer_warp_offset = + static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_warp_start_indices[warp_id] + buffer_warp_offset); + } } } } @@ -417,6 +295,7 @@ template @@ -429,6 +308,7 @@ __global__ void extract_transform_v_frontier_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -470,47 +350,73 @@ __global__ void extract_transform_v_frontier_e_mid_degree( auto rounded_up_local_out_degree = ((static_cast(local_out_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); - for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { - e_op_result_t e_op_result{}; - if (i < static_cast(local_out_degree)) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t - key_or_src{}; // key if major - std::conditional_t - key_or_dst{}; // key if major - if constexpr (GraphViewType::is_storage_transposed) { - key_or_src = minor; - key_or_dst = key; - } else { - key_or_src = key; - key_or_dst = minor; + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + local_edge_offset}; + + if (edge_partition_e_mask) { + for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + if ((i < static_cast(local_out_degree)) && + ((*edge_partition_e_mask).get(local_edge_offset + i))) { + e_op_result = call_e_op(i); } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - e_op_result = e_op(key_or_src, - key_or_dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(local_edge_offset + i)); - } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = - static_cast(atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); + + auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); + if (ballot > 0) { + if (lane_id == 0) { + auto increment = __popc(ballot); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_warp_start_indices[warp_id] = static_cast( + atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(increment))); + } + __syncwarp(); + if (e_op_result) { + auto buffer_warp_offset = + static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_warp_start_indices[warp_id] + buffer_warp_offset); + } } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); + } + } else { + for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } + + auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); + if (ballot > 0) { + if (lane_id == 0) { + auto increment = __popc(ballot); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_warp_start_indices[warp_id] = static_cast( + atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(increment))); + } + __syncwarp(); + if (e_op_result) { + auto buffer_warp_offset = + static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_warp_start_indices[warp_id] + buffer_warp_offset); + } } } } @@ -524,6 +430,7 @@ template @@ -536,6 +443,7 @@ __global__ void extract_transform_v_frontier_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -576,53 +484,78 @@ __global__ void extract_transform_v_frontier_e_high_degree( (extract_transform_v_frontier_e_kernel_block_size - 1)) / extract_transform_v_frontier_e_kernel_block_size) * extract_transform_v_frontier_e_kernel_block_size; - for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { - e_op_result_t e_op_result{}; - edge_t buffer_block_offset{0}; - - if (i < static_cast(local_out_degree)) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t - key_or_src{}; // key if major - std::conditional_t - key_or_dst{}; // key if major - if constexpr (GraphViewType::is_storage_transposed) { - key_or_src = minor; - key_or_dst = key; - } else { - key_or_src = key; - key_or_dst = minor; + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + local_edge_offset}; + + if (edge_partition_e_mask) { + for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { + e_op_result_t e_op_result{thrust::nullopt}; + edge_t buffer_block_offset{0}; + if ((i < static_cast(local_out_degree)) && + ((*edge_partition_e_mask).get(local_edge_offset + i))) { + e_op_result = call_e_op(i); + } + + BlockScan(temp_storage) + .ExclusiveSum(e_op_result ? edge_t{1} : edge_t{0}, buffer_block_offset); + if (threadIdx.x == (blockDim.x - 1)) { + auto increment = buffer_block_offset + (e_op_result ? edge_t{1} : edge_t{0}); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_block_start_idx = increment > 0 + ? static_cast(atomicAdd( + reinterpret_cast(buffer_idx_ptr), + static_cast(increment))) + : size_t{0} /* dummy */; + } + __syncthreads(); + if (e_op_result) { + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_block_start_idx + buffer_block_offset); } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - e_op_result = e_op(key_or_src, - key_or_dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(local_edge_offset + i)); - } - BlockScan(temp_storage) - .ExclusiveSum(e_op_result ? edge_t{1} : edge_t{0}, buffer_block_offset); - if (threadIdx.x == (blockDim.x - 1)) { - auto increment = buffer_block_offset + (e_op_result ? edge_t{1} : edge_t{0}); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_block_start_idx = increment > 0 - ? static_cast(atomicAdd( - reinterpret_cast(buffer_idx_ptr), - static_cast(increment))) - : size_t{0} /* dummy */; } - __syncthreads(); - if (e_op_result) { - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_block_start_idx + buffer_block_offset); + } else { + for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { + e_op_result_t e_op_result{thrust::nullopt}; + edge_t buffer_block_offset{0}; + if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } + + BlockScan(temp_storage) + .ExclusiveSum(e_op_result ? edge_t{1} : edge_t{0}, buffer_block_offset); + if (threadIdx.x == (blockDim.x - 1)) { + auto increment = buffer_block_offset + (e_op_result ? edge_t{1} : edge_t{0}); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + buffer_block_start_idx = increment > 0 + ? static_cast(atomicAdd( + reinterpret_cast(buffer_idx_ptr), + static_cast(increment))) + : size_t{0} /* dummy */; + } + __syncthreads(); + if (e_op_result) { + push_buffer_element(e_op_result, + buffer_key_output_first, + buffer_value_output_first, + buffer_block_start_idx + buffer_block_offset); + } } - } - idx += gridDim.x; + idx += gridDim.x; + } } } @@ -757,10 +690,18 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; } + auto edge_mask_view = graph_view.edge_mask_view(); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; auto edge_partition_frontier_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); @@ -846,8 +787,8 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); h_offsets.push_back(edge_partition_frontier_size); // FIXME: we may further improve performance by 1) concurrently running kernels on different - // segments; 2) individually tuning block sizes for different segments; and 3) adding one more - // segment for very high degree vertices and running segmented reduction + // segments; 2) individually tuning block sizes for different segments; and 3) adding one + // more segment for very high degree vertices and running segmented reduction if (h_offsets[0] > 0) { raft::grid_1d_block_t update_grid(h_offsets[0], extract_transform_v_frontier_e_kernel_block_size, @@ -860,6 +801,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, get_optional_dataframe_buffer_begin(key_buffer), get_optional_dataframe_buffer_begin(value_buffer), buffer_idx.data(), @@ -877,6 +819,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, get_optional_dataframe_buffer_begin(key_buffer), get_optional_dataframe_buffer_begin(value_buffer), buffer_idx.data(), @@ -886,7 +829,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(h_offsets[2] - h_offsets[1], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_low_degree + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first + h_offsets[1], @@ -894,6 +837,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, get_optional_dataframe_buffer_begin(key_buffer), get_optional_dataframe_buffer_begin(value_buffer), buffer_idx.data(), @@ -903,7 +847,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, raft::grid_1d_thread_t update_grid(h_offsets[3] - h_offsets[2], extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first + h_offsets[2], @@ -911,6 +855,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, get_optional_dataframe_buffer_begin(key_buffer), get_optional_dataframe_buffer_begin(value_buffer), buffer_idx.data(), @@ -922,7 +867,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, extract_transform_v_frontier_e_kernel_block_size, handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_low_degree + extract_transform_v_frontier_e_hypersparse_or_low_degree <<>>( edge_partition, edge_partition_frontier_key_first, @@ -930,6 +875,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, get_optional_dataframe_buffer_begin(key_buffer), get_optional_dataframe_buffer_begin(value_buffer), buffer_idx.data(), diff --git a/cpp/src/prims/detail/prim_functors.cuh b/cpp/src/prims/detail/prim_functors.cuh index 2785ba38dfd..13dc0648aef 100644 --- a/cpp/src/prims/detail/prim_functors.cuh +++ b/cpp/src/prims/detail/prim_functors.cuh @@ -22,6 +22,7 @@ namespace cugraph { namespace detail { template key_or_src{}; + std::conditional_t key_or_dst{}; + if constexpr (GraphViewType::is_storage_transposed) { + key_or_src = minor; + key_or_dst = key; + } + else { + key_or_src = key; + key_or_dst = minor; + } auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - return e_op(src, - dst, + return e_op(key_or_src, + key_or_dst, edge_partition_src_value_input.get(src_offset), edge_partition_dst_value_input.get(dst_offset), edge_partition_e_value_input.get(edge_offset + i)); } }; +template +struct call_e_op_with_key_t { + edge_partition_device_view_t const& edge_partition{}; + EdgePartitionSrcValueInputWrapper const& edge_partition_src_value_input{}; + EdgePartitionDstValueInputWrapper const& edge_partition_dst_value_input{}; + EdgePartitionEdgeValueInputWrapper const& edge_partition_e_value_input{}; + EdgeOp const& e_op{}; + + __device__ auto operator()(key_t key, typename GraphViewType::edge_type i /* index in edge_partition's edge list */) const + { + typename GraphViewType::vertex_type major{}; + if constexpr (std::is_same_v) { + major = key; + } + else { + major = thrust::get<0>(key); + } + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto minor = *(edge_partition.indices() + i); + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + std::conditional_t key_or_src{}; + std::conditional_t key_or_dst{}; + if constexpr (GraphViewType::is_storage_transposed) { + key_or_src = minor; + key_or_dst = key; + } + else { + key_or_src = key; + key_or_dst = minor; + } + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + return e_op(key_or_src, + key_or_dst, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + edge_partition_e_value_input.get(i)); + } +}; + } // namespace detail } // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 24b4f0857b1..c519d8aefed 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -190,6 +190,7 @@ __global__ void per_v_transform_reduce_e_hypersparse( edge_partition.local_edges(static_cast(major_idx)); auto call_e_op = call_e_op_t(major_offset)); auto call_e_op = call_e_op_t Date: Wed, 7 Feb 2024 12:12:08 -0800 Subject: [PATCH 07/18] replace uint32_t{0xffffffff} with raft::warp_full_mask() --- cpp/src/prims/transform_e.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh index 93a2d040b60..3f9f29e7a05 100644 --- a/cpp/src/prims/transform_e.cuh +++ b/cpp/src/prims/transform_e.cuh @@ -98,7 +98,7 @@ __global__ void transform_e_packed_bool( ? int{1} : int{0}; } - uint32_t new_val = __ballot_sync(uint32_t{0xffffffff}, predicate); + uint32_t new_val = __ballot_sync(raft::warp_full_mask(), predicate); if (lane_id == 0) { if (edge_mask == packed_bool_full_mask()) { *(edge_partition_e_value_output.value_first() + idx) = new_val; From f1ad089ec7e30e9d5ddd0485912fd785635e34a9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 12:19:44 -0800 Subject: [PATCH 08/18] update/performance tune detail::extract_transform_v_froniter_e with edge masking --- .../detail/extract_transform_v_frontier_e.cuh | 189 ++++++------------ 1 file changed, 58 insertions(+), 131 deletions(-) diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 4f53f525673..0e16eb93058 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -62,13 +62,13 @@ namespace detail { int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512; -template -__device__ void push_buffer_element(e_op_result_t e_op_result, - BufferKeyOutputIterator buffer_key_output_first, +template +__device__ void push_buffer_element(BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, - size_t buffer_idx) + size_t buffer_idx, + e_op_result_t e_op_result) { using output_key_t = typename optional_dataframe_buffer_value_type_t::value; @@ -88,6 +88,34 @@ __device__ void push_buffer_element(e_op_result_t e_op_result, } } +template +__device__ void warp_push_buffer_elements( + BufferKeyOutputIterator buffer_key_output_first, + BufferValueOutputIterator buffer_value_output_first, + cuda::atomic_ref& buffer_idx, + int lane_id, + e_op_result_t e_op_result) +{ + auto ballot = __ballot_sync(raft::warp_full_mask(), e_op_result ? uint32_t{1} : uint32_t{0}); + if (ballot > 0) { + size_t warp_buffer_start_idx{}; + if (lane_id == 0) { + auto increment = __popc(ballot); + warp_buffer_start_idx = buffer_idx.fetch_add(increment, cuda::std::memory_order_relaxed); + } + warp_buffer_start_idx = __shfl_sync(raft::warp_full_mask(), warp_buffer_start_idx, int{0}); + if (e_op_result) { + auto buffer_warp_offset = __popc(ballot & ~(raft::warp_full_mask() << lane_id)); + push_buffer_element(buffer_key_output_first, + buffer_value_output_first, + warp_buffer_start_idx + buffer_warp_offset, + e_op_result); + } + } +} + template (tid); + cuda::atomic_ref buffer_idx(*buffer_idx_ptr); + __shared__ edge_t warp_local_degree_inclusive_sums[extract_transform_v_frontier_e_kernel_block_size]; __shared__ edge_t warp_key_local_edge_offsets[extract_transform_v_frontier_e_kernel_block_size]; @@ -138,9 +168,6 @@ __global__ void extract_transform_v_frontier_e_hypersparse_or_low_degree( using WarpScan = cub::WarpScan; __shared__ typename WarpScan::TempStorage temp_storage; - __shared__ size_t - buffer_warp_start_indices[extract_transform_v_frontier_e_kernel_block_size / raft::warp_size()]; - auto indices = edge_partition.indices(); vertex_t num_keys = static_cast(thrust::distance(key_first, key_last)); @@ -227,25 +254,9 @@ __global__ void extract_transform_v_frontier_e_hypersparse_or_low_degree( e_op_result = call_e_op(key, local_edge_offset); } } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = static_cast( - atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); - } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); - } - } + + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } else { for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { @@ -264,25 +275,9 @@ __global__ void extract_transform_v_frontier_e_hypersparse_or_low_degree( auto key = *(key_first + (min_key_idx + key_idx_this_warp)); e_op_result = call_e_op(key, local_edge_offset); } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = static_cast( - atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); - } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); - } - } + + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } @@ -331,8 +326,8 @@ __global__ void extract_transform_v_frontier_e_mid_degree( auto const lane_id = tid % raft::warp_size(); auto idx = static_cast(tid / raft::warp_size()); - __shared__ size_t - buffer_warp_start_indices[extract_transform_v_frontier_e_kernel_block_size / raft::warp_size()]; + cuda::atomic_ref buffer_idx(*buffer_idx_ptr); + while (idx < static_cast(thrust::distance(key_first, key_last))) { auto key = *(key_first + idx); vertex_t major{}; @@ -374,50 +369,16 @@ __global__ void extract_transform_v_frontier_e_mid_degree( e_op_result = call_e_op(i); } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = static_cast( - atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); - } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); - } - } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } else { for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { e_op_result_t e_op_result{thrust::nullopt}; if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - auto ballot = __ballot_sync(uint32_t{0xffffffff}, e_op_result ? uint32_t{1} : uint32_t{0}); - if (ballot > 0) { - if (lane_id == 0) { - auto increment = __popc(ballot); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_warp_start_indices[warp_id] = static_cast( - atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(increment))); - } - __syncwarp(); - if (e_op_result) { - auto buffer_warp_offset = - static_cast(__popc(ballot & ~(uint32_t{0xffffffff} << lane_id))); - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_warp_start_indices[warp_id] + buffer_warp_offset); - } - } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } @@ -460,11 +421,11 @@ __global__ void extract_transform_v_frontier_e_high_degree( typename EdgePartitionEdgeValueInputWrapper::value_type, EdgeOp>::type; - auto idx = static_cast(blockIdx.x); + auto const warp_id = threadIdx.x / raft::warp_size(); + auto const lane_id = threadIdx.x % raft::warp_size(); + auto idx = static_cast(blockIdx.x); - using BlockScan = cub::BlockScan; - __shared__ typename BlockScan::TempStorage temp_storage; - __shared__ size_t buffer_block_start_idx; + cuda::atomic_ref buffer_idx(*buffer_idx_ptr); while (idx < static_cast(thrust::distance(key_first, key_last))) { auto key = *(key_first + idx); @@ -503,59 +464,25 @@ __global__ void extract_transform_v_frontier_e_high_degree( if (edge_partition_e_mask) { for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { e_op_result_t e_op_result{thrust::nullopt}; - edge_t buffer_block_offset{0}; if ((i < static_cast(local_out_degree)) && ((*edge_partition_e_mask).get(local_edge_offset + i))) { e_op_result = call_e_op(i); } - BlockScan(temp_storage) - .ExclusiveSum(e_op_result ? edge_t{1} : edge_t{0}, buffer_block_offset); - if (threadIdx.x == (blockDim.x - 1)) { - auto increment = buffer_block_offset + (e_op_result ? edge_t{1} : edge_t{0}); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_block_start_idx = increment > 0 - ? static_cast(atomicAdd( - reinterpret_cast(buffer_idx_ptr), - static_cast(increment))) - : size_t{0} /* dummy */; - } - __syncthreads(); - if (e_op_result) { - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_block_start_idx + buffer_block_offset); - } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } else { for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { e_op_result_t e_op_result{thrust::nullopt}; - edge_t buffer_block_offset{0}; if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - BlockScan(temp_storage) - .ExclusiveSum(e_op_result ? edge_t{1} : edge_t{0}, buffer_block_offset); - if (threadIdx.x == (blockDim.x - 1)) { - auto increment = buffer_block_offset + (e_op_result ? edge_t{1} : edge_t{0}); - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - buffer_block_start_idx = increment > 0 - ? static_cast(atomicAdd( - reinterpret_cast(buffer_idx_ptr), - static_cast(increment))) - : size_t{0} /* dummy */; - } - __syncthreads(); - if (e_op_result) { - push_buffer_element(e_op_result, - buffer_key_output_first, - buffer_value_output_first, - buffer_block_start_idx + buffer_block_offset); - } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } - - idx += gridDim.x; } + + idx += gridDim.x; } } From 3492ee962cf5d1790934517b54c34f0af5e3ecc7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 12:28:22 -0800 Subject: [PATCH 09/18] mark fill_edge_src|dst_property.cuh as edge-masking ready --- cpp/src/prims/fill_edge_src_dst_property.cuh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/src/prims/fill_edge_src_dst_property.cuh b/cpp/src/prims/fill_edge_src_dst_property.cuh index 86e23a1a04e..0726fbf91a4 100644 --- a/cpp/src/prims/fill_edge_src_dst_property.cuh +++ b/cpp/src/prims/fill_edge_src_dst_property.cuh @@ -123,8 +123,6 @@ void fill_edge_src_property(raft::handle_t const& handle, edge_src_property_t& edge_src_property_output, bool do_expensive_check = false) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } @@ -161,8 +159,6 @@ void fill_edge_dst_property(raft::handle_t const& handle, edge_dst_property_t& edge_dst_property_output, bool do_expensive_check = false) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } From 3036bc759964c2983792c7aefaa0217503e028c7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:16:26 -0800 Subject: [PATCH 10/18] add compute_number_of_edges_with_mask & compute_local_degree_with_mask --- .../cugraph/edge_partition_device_view.cuh | 230 ++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index d1c2cf3df52..c286c81683e 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -92,6 +93,54 @@ struct local_degree_op_t { } }; +template +struct local_degree_with_mask_op_t { + raft::device_span offsets{}; + std::conditional_t major_range_first{}; + + std::conditional_t, std::byte /* dummy */> + dcs_nzd_vertices{}; + std::conditional_t major_hypersparse_first{}; + + MaskIterator mask_first{}; + + __device__ return_type_t operator()(vertex_t major) const + { + if constexpr (multi_gpu) { + vertex_t idx{}; + if constexpr (use_dcs) { + if (major < major_hypersparse_first) { + idx = major - major_range_first; + return static_cast( + count_set_bits(mask_first, offsets[idx], offsets[idx + 1] - offsets[idx])); + } else { + auto major_hypersparse_idx = + major_hypersparse_idx_from_major_nocheck_impl(dcs_nzd_vertices, major); + if (major_hypersparse_idx) { + idx = (major_hypersparse_first - major_range_first) + *major_hypersparse_idx; + return static_cast( + count_set_bits(mask_first, offsets[idx], offsets[idx + 1] - offsets[idx])); + } else { + return return_type_t{0}; + } + } + } else { + idx = major - major_range_first; + return static_cast( + count_set_bits(mask_first, offsets[idx], offsets[idx + 1] - offsets[idx])); + } + } else { + return static_cast( + count_set_bits(mask_first, offsets[major], offsets[major + 1] - offsets[major])); + } + } +}; + template class edge_partition_device_view_base_t { public: @@ -255,6 +304,122 @@ class edge_partition_device_view_t + size_t compute_number_of_edges_with_mask(MaskIterator mask_first, + MajorIterator major_first, + MajorIterator major_last, + rmm::cuda_stream_view stream) const + { + return dcs_nzd_vertices_ ? thrust::transform_reduce( + rmm::exec_policy(stream), + major_first, + major_last, + detail::local_degree_with_mask_op_t< + vertex_t, + edge_t, + size_t /* no limit on majors.size(), so edge_t can overflow */, + multi_gpu, + true, + MaskIterator>{this->offsets_, + major_range_first_, + *dcs_nzd_vertices_, + *major_hypersparse_first_, + mask_first}, + size_t{0}, + thrust::plus()) + : thrust::transform_reduce( + rmm::exec_policy(stream), + major_first, + major_last, + detail::local_degree_with_mask_op_t< + vertex_t, + edge_t, + size_t /* no limit on majors.size(), so edge_t can overflow */, + multi_gpu, + false, + MaskIterator>{this->offsets_, + major_range_first_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}, + size_t{0}, + thrust::plus()); + } + + template + rmm::device_uvector compute_local_degrees_with_mask(MaskIterator mask_first, + rmm::cuda_stream_view stream) const + { + rmm::device_uvector local_degrees(this->major_range_size(), stream); + if (dcs_nzd_vertices_) { + assert(major_hypersparse_first_); + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(this->major_range_first()), + thrust::make_counting_iterator(this->major_range_last()), + local_degrees.begin(), + detail:: + local_degree_with_mask_op_t{ + this->offsets_, + major_range_first_, + *dcs_nzd_vertices_, + major_hypersparse_first_.value_or(vertex_t{0}), + mask_first}); + } else { + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(this->major_range_first()), + thrust::make_counting_iterator(this->major_range_last()), + local_degrees.begin(), + detail:: + local_degree_with_mask_op_t{ + this->offsets_, + major_range_first_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}); + } + return local_degrees; + } + + template + rmm::device_uvector compute_local_degrees_with_mask(MaskIterator mask_first, + MajorIterator major_first, + MajorIterator major_last, + rmm::cuda_stream_view stream) const + { + rmm::device_uvector local_degrees(thrust::distance(major_first, major_last), stream); + if (dcs_nzd_vertices_) { + assert(major_hypersparse_first_); + thrust::transform( + rmm::exec_policy(stream), + major_first, + major_last, + local_degrees.begin(), + detail:: + local_degree_with_mask_op_t{ + this->offsets_, + major_range_first_, + dcs_nzd_vertices_.value(), + major_hypersparse_first_.value_or(vertex_t{0}), + mask_first}); + } else { + thrust::transform( + rmm::exec_policy(stream), + major_first, + major_last, + local_degrees.begin(), + detail:: + local_degree_with_mask_op_t{ + this->offsets_, + major_range_first_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}); + } + return local_degrees; + } + __host__ __device__ vertex_t major_value_start_offset() const { return major_value_start_offset_; @@ -440,6 +605,71 @@ class edge_partition_device_view_t + size_t compute_number_of_edges_with_mask(MaskIterator mask_first, + MajorIterator major_first, + MajorIterator major_last, + rmm::cuda_stream_view stream) const + { + return thrust::transform_reduce( + rmm::exec_policy(stream), + major_first, + major_last, + detail::local_degree_with_mask_op_t< + vertex_t, + edge_t, + size_t /* no limit on majors.size(), so edge_t can overflow */, + multi_gpu, + false, + MaskIterator>{this->offsets_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}, + size_t{0}, + thrust::plus()); + } + + template + rmm::device_uvector compute_local_degrees_with_mask(MaskIterator mask_first, + rmm::cuda_stream_view stream) const + { + rmm::device_uvector local_degrees(this->major_range_size(), stream); + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(this->major_range_first()), + thrust::make_counting_iterator(this->major_range_last()), + local_degrees.begin(), + detail::local_degree_with_mask_op_t{ + this->offsets_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}); + return local_degrees; + } + + template + rmm::device_uvector compute_local_degrees_with_mask(MaskIterator mask_first, + MajorIterator major_first, + MajorIterator major_last, + rmm::cuda_stream_view stream) const + { + rmm::device_uvector local_degrees(thrust::distance(major_first, major_last), stream); + thrust::transform( + rmm::exec_policy(stream), + major_first, + major_last, + local_degrees.begin(), + detail::local_degree_with_mask_op_t{ + this->offsets_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + mask_first}); + return local_degrees; + } + __host__ __device__ vertex_t major_value_start_offset() const { return vertex_t{0}; } __host__ __device__ thrust::optional major_hypersparse_first() const noexcept From fc3b380acebb09936dada59ef96bd4f7cd67a5b4 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:16:43 -0800 Subject: [PATCH 11/18] add missing include --- cpp/include/cugraph/utilities/mask_utils.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh index ab1403d019b..4eb904f3836 100644 --- a/cpp/include/cugraph/utilities/mask_utils.cuh +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -25,6 +25,7 @@ #include #include #include +#include namespace cugraph { From 217ef47eded505b9c11c6a446d855ebe70e9e0f8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:18:06 -0800 Subject: [PATCH 12/18] update extract_transform_v_frontier_outgoing_e to support edge masking --- .../extract_transform_v_frontier_outgoing_e.cuh | 2 -- .../mg_extract_transform_v_frontier_outgoing_e.cu | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index 42af8a1164d..fb5f6991073 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -98,8 +98,6 @@ extract_transform_v_frontier_outgoing_e(raft::handle_t const& handle, static_assert(!std::is_same_v); using payload_t = typename e_op_result_t::value_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - auto value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); std::tie(std::ignore, value_buffer) = detail::extract_transform_v_frontier_e(handle, diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index 4d9435dd344..f18fec529b5 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -115,6 +115,7 @@ struct e_op_t { }; struct Prims_Usecase { + bool edge_masking{false}; bool check_correctness{true}; }; @@ -180,6 +181,13 @@ class Tests_MGExtractTransformVFrontierOutgoingE auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG extract_transform_v_frontier_outgoing_e const int hash_bin_count = 5; @@ -458,7 +466,7 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGExtractTransformVFrontierOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{true}), + ::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -466,7 +474,8 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGExtractTransformVFrontierOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Combine(::testing::Values(Prims_Usecase{false, true}, + Prims_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -478,7 +487,7 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGExtractTransformVFrontierOutgoingE_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{false}), + ::testing::Values(Prims_Usecase{false, false}, Prims_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 6a44b550b8f861b5a21f4b6ac3bd9d3de7213547 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:35:07 -0800 Subject: [PATCH 13/18] update transform_reduce_v_frontier_outgoing_e_by_dst to support edge masking --- ...rm_reduce_v_frontier_outgoing_e_by_dst.cuh | 41 ++++++++++++++----- ...orm_reduce_v_frontier_outgoing_e_by_dst.cu | 15 +++++-- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh index 18e722d62cc..0315b3808e0 100644 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh @@ -189,8 +189,6 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using key_t = typename VertexFrontierBucketType::key_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - size_t ret{0}; vertex_t const* local_frontier_vertex_first{nullptr}; @@ -207,10 +205,19 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, } else { local_frontier_sizes = std::vector{static_cast(frontier.size())}; } + + auto edge_mask_view = graph_view.edge_mask_view(); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -225,14 +232,30 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, static_cast(i), handle.get_stream()); - ret += edge_partition.compute_number_of_edges(edge_partition_frontier_vertices.begin(), - edge_partition_frontier_vertices.end(), - handle.get_stream()); + if (edge_partition_e_mask) { + ret += + edge_partition.compute_number_of_edges_with_mask((*edge_partition_e_mask).value_first(), + edge_partition_frontier_vertices.begin(), + edge_partition_frontier_vertices.end(), + handle.get_stream()); + } else { + ret += edge_partition.compute_number_of_edges(edge_partition_frontier_vertices.begin(), + edge_partition_frontier_vertices.end(), + handle.get_stream()); + } } else { assert(i == 0); - ret += edge_partition.compute_number_of_edges(local_frontier_vertex_first, - local_frontier_vertex_first + frontier.size(), - handle.get_stream()); + if (edge_partition_e_mask) { + ret += edge_partition.compute_number_of_edges_with_mask( + (*edge_partition_e_mask).value_first(), + local_frontier_vertex_first, + local_frontier_vertex_first + frontier.size(), + handle.get_stream()); + } else { + ret += edge_partition.compute_number_of_edges(local_frontier_vertex_first, + local_frontier_vertex_first + frontier.size(), + handle.get_stream()); + } } } @@ -323,8 +346,6 @@ transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, using key_t = typename VertexFrontierBucketType::key_type; using payload_t = typename ReduceOp::value_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index d0b97065da7..109a911eb2e 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -88,6 +88,7 @@ struct e_op_t { }; struct Prims_Usecase { + bool edge_masking{false}; bool check_correctness{true}; }; @@ -152,6 +153,13 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG transform reduce const int hash_bin_count = 5; @@ -533,7 +541,7 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGTransformReduceVFrontierOutgoingEByDst_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{true}), + ::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -541,7 +549,8 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Combine(::testing::Values(Prims_Usecase{false, true}, + Prims_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -553,7 +562,7 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGTransformReduceVFrontierOutgoingEByDst_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{false}), + ::testing::Values(Prims_Usecase{false, false}, Prims_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 60fd404c65282a7b56a4c3c31643fc91a5b94de3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:43:33 -0800 Subject: [PATCH 14/18] update extract_transform_e to support edge masking --- cpp/src/prims/extract_transform_e.cuh | 2 -- cpp/tests/prims/mg_extract_transform_e.cu | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index f135b76d6e3..fcd5e4c1483 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -107,8 +107,6 @@ extract_transform_e(raft::handle_t const& handle, static_assert(!std::is_same_v); using payload_t = typename e_op_result_t::value_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - // FIXME: Consider updating detail::extract_transform_v_forntier_e to take std::nullopt to as a // frontier or create a new key bucket type that just stores [vertex_first, vertex_last) for // further optimization. Better revisit this once this becomes a performance bottleneck and after diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index bca6471a5bb..29ff25ea8bd 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -116,6 +116,7 @@ struct e_op_t { }; struct Prims_Usecase { + bool edge_masking{false}; bool check_correctness{true}; }; @@ -180,6 +181,13 @@ class Tests_MGExtractTransformE auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG extract_transform_e const int hash_bin_count = 5; @@ -400,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGExtractTransformE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{true}), + ::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -408,7 +416,8 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGExtractTransformE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Combine(::testing::Values(Prims_Usecase{false, true}, + Prims_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -420,7 +429,7 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGExtractTransformE_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{false}), + ::testing::Values(Prims_Usecase{false, false}, Prims_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From d44c868642e8b1fcaed6f6c34a45472b01294bab Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 7 Feb 2024 15:47:52 -0800 Subject: [PATCH 15/18] clang-format --- cpp/src/prims/detail/prim_functors.cuh | 46 ++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/cpp/src/prims/detail/prim_functors.cuh b/cpp/src/prims/detail/prim_functors.cuh index 13dc0648aef..d142aed1051 100644 --- a/cpp/src/prims/detail/prim_functors.cuh +++ b/cpp/src/prims/detail/prim_functors.cuh @@ -37,25 +37,32 @@ struct call_e_op_t { EdgeOp const& e_op{}; key_t key{}; typename GraphViewType::vertex_type major_offset{}; - typename GraphViewType::vertex_type const* indices{nullptr}; // indices = edge_partition.incies() + edge_offset + typename GraphViewType::vertex_type const* indices{ + nullptr}; // indices = edge_partition.incies() + edge_offset typename GraphViewType::edge_type edge_offset{}; - __device__ auto operator()(typename GraphViewType::edge_type i /* index in key's neighbor list */) const + __device__ auto operator()( + typename GraphViewType::edge_type i /* index in key's neighbor list */) const { auto minor = indices[i]; auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t key_or_src{}; - std::conditional_t key_or_dst{}; + std::conditional_t + key_or_src{}; + std::conditional_t + key_or_dst{}; if constexpr (GraphViewType::is_storage_transposed) { key_or_src = minor; key_or_dst = key; - } - else { + } else { key_or_src = key; key_or_dst = minor; } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; return e_op(key_or_src, key_or_dst, edge_partition_src_value_input.get(src_offset), @@ -79,30 +86,35 @@ struct call_e_op_with_key_t { EdgePartitionEdgeValueInputWrapper const& edge_partition_e_value_input{}; EdgeOp const& e_op{}; - __device__ auto operator()(key_t key, typename GraphViewType::edge_type i /* index in edge_partition's edge list */) const + __device__ auto operator()( + key_t key, typename GraphViewType::edge_type i /* index in edge_partition's edge list */) const { typename GraphViewType::vertex_type major{}; if constexpr (std::is_same_v) { major = key; - } - else { + } else { major = thrust::get<0>(key); } auto major_offset = edge_partition.major_offset_from_major_nocheck(major); auto minor = *(edge_partition.indices() + i); auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - std::conditional_t key_or_src{}; - std::conditional_t key_or_dst{}; + std::conditional_t + key_or_src{}; + std::conditional_t + key_or_dst{}; if constexpr (GraphViewType::is_storage_transposed) { key_or_src = minor; key_or_dst = key; - } - else { + } else { key_or_src = key; key_or_dst = minor; } - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; return e_op(key_or_src, key_or_dst, edge_partition_src_value_input.get(src_offset), From 0b40837f2375377053dd2e4bb2816b28ac41d029 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 9 Feb 2024 14:23:47 -0800 Subject: [PATCH 16/18] copyright year --- cpp/include/cugraph/utilities/mask_utils.cuh | 2 +- cpp/src/centrality/betweenness_centrality_impl.cuh | 2 +- cpp/src/prims/detail/extract_transform_v_frontier_e.cuh | 2 +- cpp/src/prims/extract_transform_e.cuh | 2 +- cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh | 2 +- cpp/src/prims/fill_edge_src_dst_property.cuh | 2 +- cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh | 2 +- cpp/src/structure/graph_weight_utils_impl.cuh | 2 +- cpp/src/structure/induced_subgraph_impl.cuh | 2 +- cpp/tests/prims/mg_extract_transform_e.cu | 2 +- cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu | 2 +- .../mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu | 2 +- .../prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu | 2 +- cpp/tests/structure/weight_sum_test.cpp | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh index 4eb904f3836..5621b1267e9 100644 --- a/cpp/include/cugraph/utilities/mask_utils.cuh +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 8826bcd50f2..08907716412 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 0e16eb93058..608a824c57e 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index fcd5e4c1483..25e04fff83a 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index fb5f6991073..f3b85da53ea 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/prims/fill_edge_src_dst_property.cuh b/cpp/src/prims/fill_edge_src_dst_property.cuh index 0726fbf91a4..5a7fe6b5044 100644 --- a/cpp/src/prims/fill_edge_src_dst_property.cuh +++ b/cpp/src/prims/fill_edge_src_dst_property.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh index d669bd44088..cac648079b0 100644 --- a/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh +++ b/cpp/src/sampling/detail/gather_one_hop_edgelist_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/structure/graph_weight_utils_impl.cuh b/cpp/src/structure/graph_weight_utils_impl.cuh index e97266c557a..173b4df207b 100644 --- a/cpp/src/structure/graph_weight_utils_impl.cuh +++ b/cpp/src/structure/graph_weight_utils_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index f6bb7ee605d..fa4c5e0f6f2 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index 29ff25ea8bd..caa00e13640 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index f18fec529b5..09c2fc0b2cb 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index be92c82776a..3e59bf3bf20 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 109a911eb2e..7d1b2dd9412 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/structure/weight_sum_test.cpp b/cpp/tests/structure/weight_sum_test.cpp index d53ddeb4e48..34e4f6b6147 100644 --- a/cpp/tests/structure/weight_sum_test.cpp +++ b/cpp/tests/structure/weight_sum_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From b71f6560050aeb946cb89b0cbf74ce78482a887c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 9 Feb 2024 14:31:30 -0800 Subject: [PATCH 17/18] clang-format --- cpp/tests/structure/weight_sum_test.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/tests/structure/weight_sum_test.cpp b/cpp/tests/structure/weight_sum_test.cpp index 34e4f6b6147..30de0092a5a 100644 --- a/cpp/tests/structure/weight_sum_test.cpp +++ b/cpp/tests/structure/weight_sum_test.cpp @@ -47,8 +47,7 @@ void weight_sum_reference(edge_t const* offsets, if (!major) { std::fill(weight_sums, weight_sums + num_vertices, weight_t{0.0}); } for (vertex_t i = 0; i < num_vertices; ++i) { if (major) { - weight_sums[i] = - std::reduce(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); + weight_sums[i] = std::reduce(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); } else { for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { auto nbr = indices[j]; @@ -66,7 +65,8 @@ typedef struct WeightSum_Usecase_t { } WeightSum_Usecase; template -class Tests_WeightSum : public ::testing::TestWithParam> { +class Tests_WeightSum + : public ::testing::TestWithParam> { public: Tests_WeightSum() {} @@ -133,7 +133,8 @@ class Tests_WeightSum : public ::testing::TestWithParam h_reference_in_weight_sums(graph_view.number_of_vertices()); std::vector h_reference_out_weight_sums(graph_view.number_of_vertices()); @@ -158,9 +159,8 @@ class Tests_WeightSum : public ::testing::TestWithParam::min(); auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - auto ret = - std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + auto ret = + std::abs(lhs - rhs) < std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); return std::abs(lhs - rhs) < std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); }; From 595ae9211205f07511f4c8fa323f9d525014d21e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 9 Feb 2024 17:00:40 -0800 Subject: [PATCH 18/18] fix compile error --- cpp/src/prims/transform_reduce_e.cuh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 7acc7461268..109d10a8b1c 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -99,6 +99,7 @@ __global__ void transform_reduce_e_hypersparse( thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx); auto call_e_op = call_e_op_t