diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d592b45609c..1e71ddf37e9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -231,6 +231,7 @@ set(CUGRAPH_SOURCES src/components/legacy/connectivity.cu src/centrality/legacy/betweenness_centrality.cu src/generators/generate_rmat_edgelist.cu + src/generators/generate_bipartite_rmat_edgelist.cu src/generators/generator_tools.cu src/generators/simple_generators.cu src/generators/erdos_renyi_generator.cu diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp index fab92259196..4944e0f4917 100644 --- a/cpp/include/cugraph/graph_generators.hpp +++ b/cpp/include/cugraph/graph_generators.hpp @@ -127,6 +127,45 @@ std::tuple, rmm::device_uvector> generat double c = 0.19, bool clip_and_flip = false); +/** + * @brief generate an edge list for a bipartite R-mat graph. + * + * The source vertex IDs will be in the range of [0, 2^src_scale) and the destination vertex IDs + * will be in the range of [0, 2^dst_scale). This function allows multi-edges. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng_state RAFT RNG state, updated with each call + * @param src_scale Scale factor to set the range of source vertex IDs (or the first vertex set) in + * the bipartite graph. Vertex IDs have values in [0, V_src), where V_src = 1 << @p src_scale. + * @param dst_scale Scale factor to set the range of destination vertex IDs (or the second vertex + * set) in the bipartite graph. Vertex IDs have values in [0, V_dst), where V_dst = 1 << @p + * dst_scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_bipartite_rmat_edgelist(raft::handle_t const& handle, + raft::random::RngState& rng_state, + size_t src_scale, + size_t dst_scale, + size_t num_edges, + double a = 0.57, + double b = 0.19, + double c = 0.19); + enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; /** @@ -408,11 +447,30 @@ symmetrize_edgelist_from_triangular( std::optional>&& optional_d_weights_v, bool check_diagonal = false); +/** + * @brief scramble vertex IDs in a graph + * + * Given a vertex list for a graph, scramble the input vertex IDs. + * + * The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Vector of input vertices + * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN). + * @return rmm::device_uvector object storing scrambled vertex IDs. + */ +template +rmm::device_uvector scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + size_t lgN); + /** * @brief scramble vertex ids in a graph * - * Given an edgelist for a graph, scramble all vertex ids by the given offset. - * This translation is done in place. + * Given an edge list for a graph, scramble the input vertex IDs. * * The scramble code here follows the algorithm in the Graph 500 reference * implementation version 3.0.0. @@ -420,17 +478,18 @@ symmetrize_edgelist_from_triangular( * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param d_src_v Vector of source vertices - * @param d_dst_v Vector of destination vertices - * @param vertex_id_offset Offset to add to each vertex id - * @param seed Used to initialize random number generator + * @param d_src_v Vector of input source vertices + * @param d_dst_v Vector of input destination vertices + * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN). + * @return Tuple of two rmm::device_uvector objects storing scrambled source & destination vertex + * IDs, respectively. */ template -void scramble_vertex_ids(raft::handle_t const& handle, - rmm::device_uvector& d_src_v, - rmm::device_uvector& d_dst_v, - vertex_t vertex_id_offset, - uint64_t seed = 0); +std::tuple, rmm::device_uvector> scramble_vertex_ids( + raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + size_t lgN); /** * @brief Combine edgelists from multiple sources into a single edgelist diff --git a/cpp/src/generators/generate_bipartite_rmat_edgelist.cu b/cpp/src/generators/generate_bipartite_rmat_edgelist.cu new file mode 100644 index 00000000000..c02e1a7e7fa --- /dev/null +++ b/cpp/src/generators/generate_bipartite_rmat_edgelist.cu @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include + +namespace cugraph { + +template +std::tuple, rmm::device_uvector> +generate_bipartite_rmat_edgelist(raft::handle_t const& handle, + raft::random::RngState& rng_state, + size_t src_scale, + size_t dst_scale, + size_t num_edges, + double a, + double b, + double c) +{ + CUGRAPH_EXPECTS( + (size_t{1} << src_scale) <= static_cast(std::numeric_limits::max()), + "Invalid input argument: src_scale too large for vertex_t."); + CUGRAPH_EXPECTS( + (size_t{1} << dst_scale) <= static_cast(std::numeric_limits::max()), + "Invalid input argument: dst_scale too large for vertex_t."); + CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0), + "Invalid input argument: a, b, c should be non-negative and a + b + c should not " + "be larger than 1.0."); + + // to limit memory footprint (1024 is a tuning parameter) + auto max_edges_to_generate_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * 1024; + rmm::device_uvector rands( + std::min(num_edges, max_edges_to_generate_per_iteration) * (src_scale + dst_scale), + handle.get_stream()); + + rmm::device_uvector srcs(num_edges, handle.get_stream()); + rmm::device_uvector dsts(num_edges, handle.get_stream()); + + size_t num_edges_generated{0}; + while (num_edges_generated < num_edges) { + auto num_edges_to_generate = + std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) + + num_edges_generated; + + detail::uniform_random_fill(handle.get_stream(), + rands.data(), + num_edges_to_generate * (src_scale + dst_scale), + 0.0f, + 1.0f, + rng_state); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_edges_to_generate), + pair_first, + // if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant + [src_scale, + dst_scale, + rands = rands.data(), + a_plus_b = a + b, + a_plus_c = a + c, + a_norm = (a + b) > 0.0 ? a / (a + b) : 0.0, + c_norm = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) { + vertex_t src{0}; + vertex_t dst{0}; + size_t rand_offset = i * (src_scale + dst_scale); + for (int level = 0; level < static_cast(std::max(src_scale, dst_scale)); ++level) { + auto dst_threshold = a_plus_c; + if (level < src_scale) { + auto r = rands[rand_offset++]; + auto src_bit_set = r > a_plus_b; + src += + src_bit_set ? static_cast(vertex_t{1} << (src_scale - (level + 1))) : 0; + dst_threshold = src_bit_set ? c_norm : a_norm; + } + if (level < dst_scale) { + auto r = rands[rand_offset++]; + auto dst_bit_set = r > dst_threshold; + dst += + dst_bit_set ? static_cast(vertex_t{1} << (dst_scale - (level + 1))) : 0; + } + } + return thrust::make_tuple(src, dst); + }); + num_edges_generated += num_edges_to_generate; + } + + return std::make_tuple(std::move(srcs), std::move(dsts)); +} + +template std::tuple, rmm::device_uvector> +generate_bipartite_rmat_edgelist(raft::handle_t const& handle, + raft::random::RngState& rng_state, + size_t src_scale, + size_t dst_scale, + size_t num_edges, + double a, + double b, + double c); + +template std::tuple, rmm::device_uvector> +generate_bipartite_rmat_edgelist(raft::handle_t const& handle, + raft::random::RngState& rng_state, + size_t src_scale, + size_t dst_scale, + size_t num_edges, + double a, + double b, + double c); + +} // namespace cugraph diff --git a/cpp/src/generators/generator_tools.cu b/cpp/src/generators/generator_tools.cu index ece07c43efd..1650d3c15c6 100644 --- a/cpp/src/generators/generator_tools.cu +++ b/cpp/src/generators/generator_tools.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,10 +43,10 @@ template rmm::device_uvector append_all(raft::handle_t const& handle, std::vector>&& input) { - size_t size{0}; - // for (size_t i = 0; i < input.size(); ++i) size += input[i].size(); - for (auto& element : input) - size += element.size(); + auto size = std::transform_reduce( + input.begin(), input.end(), size_t{0}, std::plus{}, [](auto const& element) { + return element.size(); + }); rmm::device_uvector output(size, handle.get_stream()); auto output_iter = output.begin(); @@ -56,36 +56,43 @@ rmm::device_uvector append_all(raft::handle_t const& handle, output_iter += element.size(); } - /* -for (size_t i = 0; i < input.size(); ++i) { - raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream()); - output_iter += input[i].size(); -} - */ - return output; } } // namespace detail template -void scramble_vertex_ids(raft::handle_t const& handle, - rmm::device_uvector& d_src_v, - rmm::device_uvector& d_dst_v, - vertex_t vertex_id_offset, - uint64_t seed) +rmm::device_uvector scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + size_t lgN) { - vertex_t scale = 1 + raft::log2(d_src_v.size()); + thrust::transform(handle.get_thrust_policy(), + vertices.begin(), + vertices.end(), + vertices.begin(), + [lgN] __device__(auto v) { return detail::scramble(v, lgN); }); - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + return std::move(vertices); +} + +template +std::tuple, rmm::device_uvector> scramble_vertex_ids( + raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + size_t lgN) +{ + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())); thrust::transform(handle.get_thrust_policy(), pair_first, - pair_first + d_src_v.size(), + pair_first + srcs.size(), pair_first, - [scale] __device__(auto pair) { - return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale), - detail::scramble(thrust::get<1>(pair), scale)); + [lgN] __device__(auto pair) { + return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), lgN), + detail::scramble(thrust::get<1>(pair), lgN)); }); + + return std::make_tuple(std::move(srcs), std::move(dsts)); } template @@ -250,17 +257,25 @@ symmetrize_edgelist_from_triangular( optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt); } -template void scramble_vertex_ids(raft::handle_t const& handle, - rmm::device_uvector& d_src_v, - rmm::device_uvector& d_dst_v, - int32_t vertex_id_offset, - uint64_t seed); - -template void scramble_vertex_ids(raft::handle_t const& handle, - rmm::device_uvector& d_src_v, - rmm::device_uvector& d_dst_v, - int64_t vertex_id_offset, - uint64_t seed); +template rmm::device_uvector scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + size_t lgN); + +template rmm::device_uvector scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + size_t lgN); + +template std::tuple, rmm::device_uvector> scramble_vertex_ids( + raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + size_t lgN); + +template std::tuple, rmm::device_uvector> scramble_vertex_ids( + raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + size_t lgN); template std::tuple, rmm::device_uvector, diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 72ea93ffcf1..36e231ad570 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -47,8 +47,8 @@ std::unique_ptr call_generate_rmat_edgelist(raft::handle_t co handle, scale, num_edges, a, b, c, seed, clip_and_flip); if (scramble_vertex_ids) { - cugraph::scramble_vertex_ids( - handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); + src_dst_tuple = cugraph::scramble_vertex_ids( + handle, std::move(std::get<0>(src_dst_tuple)), std::move(std::get<1>(src_dst_tuple)), scale); } graph_generator_t gg_vals{ @@ -82,11 +82,15 @@ call_generate_rmat_edgelists(raft::handle_t const& handle, clip_and_flip); if (scramble_vertex_ids) { - std::for_each( - src_dst_vec_tuple.begin(), src_dst_vec_tuple.end(), [&handle, seed](auto& src_dst_tuple) { - cugraph::scramble_vertex_ids( - handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); - }); + std::for_each(src_dst_vec_tuple.begin(), + src_dst_vec_tuple.end(), + [&handle, max_scale, seed](auto& src_dst_tuple) { + src_dst_tuple = + cugraph::scramble_vertex_ids(handle, + std::move(std::get<0>(src_dst_tuple)), + std::move(std::get<1>(src_dst_tuple)), + max_scale); + }); } std::vector, std::unique_ptr>> diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2fd9f06447f..a784451402b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -256,6 +256,10 @@ ConfigureTest(STREAM_TEST structure/streams.cu) # - R-mat graph generation tests ------------------------------------------------------------------ ConfigureTest(GENERATE_RMAT_TEST generators/generate_rmat_test.cpp) +################################################################################################### +# - Bipartite R-mat graph generation tests -------------------------------------------------------- +ConfigureTest(GENERATE_BIPARTITE_RMAT_TEST generators/generate_bipartite_rmat_test.cpp) + ################################################################################################### # - Graph mask tests ----------------------------------------------------------------------------------- ConfigureTest(GRAPH_MASK_TEST structure/graph_mask_test.cpp) diff --git a/cpp/tests/generators/generate_bipartite_rmat_test.cpp b/cpp/tests/generators/generate_bipartite_rmat_test.cpp new file mode 100644 index 00000000000..b97c0a7483c --- /dev/null +++ b/cpp/tests/generators/generate_bipartite_rmat_test.cpp @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +// this function assumes that vertex IDs are not scrambled +template +void validate_bipartite_rmat_distribution( + std::tuple* edges, + size_t num_edges, + vertex_t src_first, + vertex_t src_last, + vertex_t dst_first, + vertex_t dst_last, + double a, + double b, + double c, + size_t min_edges /* stop recursion if # edges < min_edges */, + double error_tolerance /* (computed a|b|c - input a|b|c) shoud be smaller than error_tolerance*/) +{ + // we cannot expect the ratios of the edges in the four quadrants of the graph adjacency matrix to + // converge close to a, b, c, d if num_edges is not large enough. + if (num_edges < min_edges) { return; } + + auto src_threshold = (src_first + src_last) / 2; + auto dst_threshold = (dst_first + dst_last) / 2; + + if (src_last - src_first >= 2) { + auto a_plus_b_last = std::partition(edges, edges + num_edges, [src_threshold](auto edge) { + return std::get<0>(edge) < src_threshold; + }); + if (dst_last - dst_first >= 2) { + auto a_last = std::partition(edges, a_plus_b_last, [dst_threshold](auto edge) { + return std::get<1>(edge) < dst_threshold; + }); + auto c_last = std::partition(a_plus_b_last, edges + num_edges, [dst_threshold](auto edge) { + return std::get<1>(edge) < dst_threshold; + }); + + ASSERT_TRUE(std::abs((double)std::distance(edges, a_last) / num_edges - a) < error_tolerance) + << "# edges=" << num_edges + << " computed a=" << (double)std::distance(edges, a_last) / num_edges << " iput a=" << a + << " error tolerance=" << error_tolerance << "."; + ASSERT_TRUE(std::abs((double)std::distance(a_last, a_plus_b_last) / num_edges - b) < + error_tolerance) + << "# edges=" << num_edges + << " computed b=" << (double)std::distance(a_last, a_plus_b_last) / num_edges + << " iput b=" << b << " error tolerance=" << error_tolerance << "."; + ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - c) < + error_tolerance) + << "# edges=" << num_edges + << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges + << " iput c=" << c << " error tolerance=" << error_tolerance << "."; + + if ((src_threshold - src_first) * (dst_threshold - dst_first) >= 2) { + validate_bipartite_rmat_distribution(edges, + std::distance(edges, a_last), + src_first, + src_threshold, + dst_first, + dst_threshold, + a, + b, + c, + min_edges, + error_tolerance); + } + if ((src_threshold - src_first) * (dst_last - dst_threshold) >= 2) { + validate_bipartite_rmat_distribution(a_last, + std::distance(a_last, a_plus_b_last), + src_first, + src_threshold, + dst_threshold, + dst_last, + a, + b, + c, + min_edges, + error_tolerance); + } + if ((src_last - src_threshold) * (dst_threshold - dst_first) >= 2) { + validate_bipartite_rmat_distribution(a_plus_b_last, + std::distance(a_plus_b_last, c_last), + src_threshold, + src_last, + dst_first, + dst_threshold, + a, + b, + c, + min_edges, + error_tolerance); + } + if ((src_last - src_threshold) * (dst_last - dst_threshold) >= 2) { + validate_bipartite_rmat_distribution(c_last, + std::distance(c_last, edges + num_edges), + src_threshold, + src_last, + dst_threshold, + dst_last, + a, + b, + c, + min_edges, + error_tolerance); + } + } else { + ASSERT_TRUE(std::abs((double)std::distance(edges, a_plus_b_last) / num_edges - (a + b)) < + error_tolerance) + << "# edges=" << num_edges + << " computed a+b=" << (double)std::distance(edges, a_plus_b_last) / num_edges + << " iput a+b=" << (a + b) << " error tolerance=" << error_tolerance << "."; + if (src_threshold - src_first >= 2) { + validate_bipartite_rmat_distribution(edges, + std::distance(edges, a_plus_b_last), + src_first, + src_threshold, + dst_first, + dst_last, + a, + b, + c, + min_edges, + error_tolerance); + } + if (src_last - src_threshold >= 2) { + validate_bipartite_rmat_distribution(edges, + std::distance(a_plus_b_last, edges + num_edges), + src_threshold, + src_last, + dst_first, + dst_last, + a, + b, + c, + min_edges, + error_tolerance); + } + } + } else if (dst_last - dst_first >= 2) { + auto a_plus_c_last = std::partition(edges, edges + num_edges, [dst_threshold](auto edge) { + return std::get<1>(edge) < dst_threshold; + }); + ASSERT_TRUE(std::abs((double)std::distance(edges, a_plus_c_last) / num_edges - (a + c)) < + error_tolerance) + << "# edges=" << num_edges + << " computed a+c=" << (double)std::distance(edges, a_plus_c_last) / num_edges + << " iput a+c=" << (a + c) << " error tolerance=" << error_tolerance << "."; + if (dst_threshold - dst_first >= 2) { + validate_bipartite_rmat_distribution(edges, + std::distance(edges, a_plus_c_last), + src_first, + src_last, + dst_first, + dst_threshold, + a, + b, + c, + min_edges, + error_tolerance); + } + if (dst_last - dst_threshold >= 2) { + validate_bipartite_rmat_distribution(edges, + std::distance(a_plus_c_last, edges + num_edges), + src_first, + src_last, + dst_threshold, + dst_last, + a, + b, + c, + min_edges, + error_tolerance); + } + } + + return; +} + +struct GenerateBipartiteRmat_Usecase { + size_t src_scale{0}; + size_t dst_scale{0}; + size_t src_edge_factor{0}; // # edges = 2^src_scale * src_edge_factor + double a{0.0}; + double b{0.0}; + double c{0.0}; + + GenerateBipartiteRmat_Usecase( + size_t src_scale, size_t dst_scale, size_t src_edge_factor, double a, double b, double c) + : src_scale(src_scale), + dst_scale(dst_scale), + src_edge_factor(src_edge_factor), + a(a), + b(b), + c(c){}; +}; + +class Tests_GenerateBipartiteRmat : public ::testing::TestWithParam { + public: + Tests_GenerateBipartiteRmat() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(GenerateBipartiteRmat_Usecase const& configuration) + { + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + auto num_src_vertices = static_cast(size_t{1} << configuration.src_scale); + auto num_dst_vertices = static_cast(size_t{1} << configuration.dst_scale); + + std::vector no_scramble_out_degrees(num_src_vertices, 0); + std::vector no_scramble_in_degrees(num_dst_vertices, 0); + std::vector scramble_out_degrees(num_src_vertices, 0); + std::vector scramble_in_degrees(num_dst_vertices, 0); + for (size_t scramble = 0; scramble < 2; ++scramble) { + raft::random::RngState rng_state(0); + + rmm::device_uvector d_srcs(0, handle.get_stream()); + rmm::device_uvector d_dsts(0, handle.get_stream()); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Generate edge list"); + } + + std::tie(d_srcs, d_dsts) = cugraph::generate_bipartite_rmat_edgelist( + handle, + rng_state, + configuration.src_scale, + configuration.dst_scale, + (size_t{1} << configuration.src_scale) * configuration.src_edge_factor, + configuration.a, + configuration.b, + configuration.c); + + if (scramble == 1) { + d_srcs = cugraph::scramble_vertex_ids(handle, std::move(d_srcs), configuration.src_scale); + d_dsts = cugraph::scramble_vertex_ids(handle, std::move(d_dsts), configuration.dst_scale); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto h_cugraph_srcs = cugraph::test::to_host(handle, d_srcs); + auto h_cugraph_dsts = cugraph::test::to_host(handle, d_dsts); + + ASSERT_TRUE((h_cugraph_srcs.size() == + (size_t{1} << configuration.src_scale) * configuration.src_edge_factor) && + (h_cugraph_dsts.size() == + (size_t{1} << configuration.src_scale) * configuration.src_edge_factor)) + << "Returned an invalid number of bipartite R-mat graph edges."; + ASSERT_TRUE(std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_src_vertices](auto v) { + return !cugraph::is_valid_vertex(num_src_vertices, v); + }) == 0) + << "Returned bipartite R-mat graph edges have invalid source vertex IDs."; + ASSERT_TRUE(std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_dst_vertices](auto v) { + return !cugraph::is_valid_vertex(num_dst_vertices, v); + }) == 0) + << "Returned bipartite R-mat graph edges have invalid destination vertex IDs."; + + if (!scramble) { + std::vector> h_cugraph_edges(h_cugraph_srcs.size()); + for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) { + h_cugraph_edges[i] = std::make_tuple(h_cugraph_srcs[i], h_cugraph_dsts[i]); + } + + validate_bipartite_rmat_distribution(h_cugraph_edges.data(), + h_cugraph_edges.size(), + vertex_t{0}, + num_src_vertices, + vertex_t{0}, + num_dst_vertices, + configuration.a, + configuration.b, + configuration.c, + size_t{100000}, + 0.01); + } + + if (scramble) { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&scramble_out_degrees](auto src) { scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&scramble_in_degrees](auto dst) { scramble_in_degrees[dst]++; }); + std::sort(scramble_out_degrees.begin(), scramble_out_degrees.end()); + std::sort(scramble_in_degrees.begin(), scramble_in_degrees.end()); + } else { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&no_scramble_out_degrees](auto src) { no_scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&no_scramble_in_degrees](auto dst) { no_scramble_in_degrees[dst]++; }); + std::sort(no_scramble_out_degrees.begin(), no_scramble_out_degrees.end()); + std::sort(no_scramble_in_degrees.begin(), no_scramble_in_degrees.end()); + } + } + + // this relies on the fact that the edge generator is deterministic. + // ideally, we should test that the two graphs are isomorphic, but this is NP hard; instead, we + // just check out-degree & in-degree distributions + ASSERT_TRUE(std::equal(no_scramble_out_degrees.begin(), + no_scramble_out_degrees.end(), + scramble_out_degrees.begin())); + ASSERT_TRUE(std::equal( + no_scramble_in_degrees.begin(), no_scramble_in_degrees.end(), scramble_in_degrees.begin())); + } +}; + +TEST_P(Tests_GenerateBipartiteRmat, CheckInt32) { run_current_test(GetParam()); } +TEST_P(Tests_GenerateBipartiteRmat, CheckInt64) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_SUITE_P( + simple_test, + Tests_GenerateBipartiteRmat, + ::testing::Values(GenerateBipartiteRmat_Usecase(20, 10, 16, 0.57, 0.19, 0.19), + GenerateBipartiteRmat_Usecase(10, 20, 16, 0.57, 0.19, 0.19), + GenerateBipartiteRmat_Usecase(20, 10, 16, 0.45, 0.22, 0.22), + GenerateBipartiteRmat_Usecase(10, 20, 16, 0.45, 0.22, 0.22))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/generators/generate_rmat_test.cpp b/cpp/tests/generators/generate_rmat_test.cpp index c4150b9732d..bdf79fd5962 100644 --- a/cpp/tests/generators/generate_rmat_test.cpp +++ b/cpp/tests/generators/generate_rmat_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -88,59 +89,67 @@ void validate_rmat_distribution( << " iput c=" << c << " error tolerance=" << error_tolerance << "."; } - validate_rmat_distribution(edges, - std::distance(edges, a_last), - src_first, - src_threshold, - dst_first, - dst_threshold, - a, - b, - c, - clip_and_flip, - min_edges, - error_tolerance); - validate_rmat_distribution(a_last, - std::distance(a_last, a_plus_b_last), - src_first, - (src_first + src_last) / 2, - dst_threshold, - dst_last, - a, - b, - c, - clip_and_flip, - min_edges, - error_tolerance); - validate_rmat_distribution(a_plus_b_last, - std::distance(a_plus_b_last, c_last), - src_threshold, - src_last, - dst_first, - dst_threshold, - a, - b, - c, - clip_and_flip, - min_edges, - error_tolerance); - validate_rmat_distribution(c_last, - std::distance(c_last, edges + num_edges), - src_threshold, - src_last, - dst_threshold, - dst_last, - a, - b, - c, - clip_and_flip, - min_edges, - error_tolerance); + if ((src_threshold - src_first) * (dst_threshold - dst_first) >= 2) { + validate_rmat_distribution(edges, + std::distance(edges, a_last), + src_first, + src_threshold, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + } + if ((src_threshold - src_first) * (dst_last - dst_threshold) >= 2) { + validate_rmat_distribution(a_last, + std::distance(a_last, a_plus_b_last), + src_first, + src_threshold, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + } + if ((src_last - src_threshold) * (dst_threshold - dst_first) >= 2) { + validate_rmat_distribution(a_plus_b_last, + std::distance(a_plus_b_last, c_last), + src_threshold, + src_last, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + } + if ((src_last - src_threshold) * (dst_last - dst_threshold) >= 2) { + validate_rmat_distribution(c_last, + std::distance(c_last, edges + num_edges), + src_threshold, + src_last, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + } return; } -typedef struct GenerateRmat_Usecase_t { +struct GenerateRmat_Usecase { size_t scale{0}; size_t edge_factor{0}; double a{0.0}; @@ -148,10 +157,10 @@ typedef struct GenerateRmat_Usecase_t { double c{0.0}; bool clip_and_flip{false}; - GenerateRmat_Usecase_t( + GenerateRmat_Usecase( size_t scale, size_t edge_factor, double a, double b, double c, bool clip_and_flip) : scale(scale), edge_factor(edge_factor), a(a), b(b), c(c), clip_and_flip(clip_and_flip){}; -} GenerateRmat_Usecase; +}; class Tests_GenerateRmat : public ::testing::TestWithParam { public: @@ -167,6 +176,7 @@ class Tests_GenerateRmat : public ::testing::TestWithParam void run_current_test(GenerateRmat_Usecase const& configuration) { raft::handle_t handle{}; + HighResTimer hr_timer{}; auto num_vertices = static_cast(size_t{1} << configuration.scale); std::vector no_scramble_out_degrees(num_vertices, 0); @@ -174,23 +184,36 @@ class Tests_GenerateRmat : public ::testing::TestWithParam std::vector scramble_out_degrees(num_vertices, 0); std::vector scramble_in_degrees(num_vertices, 0); for (size_t scramble = 0; scramble < 2; ++scramble) { + raft::random::RngState rng_state(0); + rmm::device_uvector d_srcs(0, handle.get_stream()); rmm::device_uvector d_dsts(0, handle.get_stream()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Generate edge list"); + } std::tie(d_srcs, d_dsts) = cugraph::generate_rmat_edgelist( handle, + rng_state, configuration.scale, (size_t{1} << configuration.scale) * configuration.edge_factor, configuration.a, configuration.b, configuration.c, - uint64_t{0}, configuration.clip_and_flip); - // static_cast(scramble)); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (scramble == 1) { + std::tie(d_srcs, d_dsts) = cugraph::scramble_vertex_ids( + handle, std::move(d_srcs), std::move(d_dsts), configuration.scale); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } auto h_cugraph_srcs = cugraph::test::to_host(handle, d_srcs); auto h_cugraph_dsts = cugraph::test::to_host(handle, d_dsts); @@ -199,17 +222,17 @@ class Tests_GenerateRmat : public ::testing::TestWithParam (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) && (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor)) << "Returned an invalid number of R-mat graph edges."; - ASSERT_TRUE( - std::count_if(h_cugraph_srcs.begin(), - h_cugraph_srcs.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_vertices](auto v) { + return !cugraph::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid source vertex IDs."; - ASSERT_TRUE( - std::count_if(h_cugraph_dsts.begin(), - h_cugraph_dsts.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_vertices](auto v) { + return !cugraph::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid destination vertex IDs."; if (!scramble) { @@ -260,7 +283,7 @@ class Tests_GenerateRmat : public ::testing::TestWithParam } // this relies on the fact that the edge generator is deterministic. - // ideally, we should test that the two graphs are isomorphic, but this is NP hard; insted, we + // ideally, we should test that the two graphs are isomorphic, but this is NP hard; instead, we // just check out-degree & in-degree distributions ASSERT_TRUE(std::equal(no_scramble_out_degrees.begin(), no_scramble_out_degrees.end(), @@ -270,9 +293,8 @@ class Tests_GenerateRmat : public ::testing::TestWithParam } }; -// FIXME: add tests for type combinations - TEST_P(Tests_GenerateRmat, CheckInt32) { run_current_test(GetParam()); } +TEST_P(Tests_GenerateRmat, CheckInt64) { run_current_test(GetParam()); } INSTANTIATE_TEST_SUITE_P(simple_test, Tests_GenerateRmat, @@ -280,7 +302,8 @@ INSTANTIATE_TEST_SUITE_P(simple_test, GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); -typedef struct GenerateRmats_Usecase_t { + +struct GenerateRmats_Usecase { size_t n_edgelists{0}; size_t min_scale{0}; size_t max_scale{0}; @@ -288,19 +311,20 @@ typedef struct GenerateRmats_Usecase_t { cugraph::generator_distribution_t component_distribution; cugraph::generator_distribution_t edge_distribution; - GenerateRmats_Usecase_t(size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::generator_distribution_t component_distribution, - cugraph::generator_distribution_t edge_distribution) + GenerateRmats_Usecase(size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t component_distribution, + cugraph::generator_distribution_t edge_distribution) : n_edgelists(n_edgelists), min_scale(min_scale), max_scale(max_scale), component_distribution(component_distribution), edge_distribution(edge_distribution), edge_factor(edge_factor){}; -} GenerateRmats_Usecase; +}; + class Tests_GenerateRmats : public ::testing::TestWithParam { public: Tests_GenerateRmats() {} @@ -315,19 +339,30 @@ class Tests_GenerateRmats : public ::testing::TestWithParam(handle, + rng_state, configuration.n_edgelists, configuration.min_scale, configuration.max_scale, configuration.edge_factor, configuration.component_distribution, - configuration.edge_distribution, - uint64_t{0}); + configuration.edge_distribution); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement ASSERT_EQ(configuration.n_edgelists, outputs.size()); for (auto i = outputs.begin(); i != outputs.end(); ++i) { ASSERT_EQ(std::get<0>(*i).size(), std::get<1>(*i).size()); @@ -336,7 +371,9 @@ class Tests_GenerateRmats : public ::testing::TestWithParam(GetParam()); } +TEST_P(Tests_GenerateRmats, CheckInt64) { run_current_test(GetParam()); } INSTANTIATE_TEST_SUITE_P( simple_test, diff --git a/cpp/tests/generators/generators_test.cpp b/cpp/tests/generators/generators_test.cpp index 74794833b46..b5dbf54b265 100644 --- a/cpp/tests/generators/generators_test.cpp +++ b/cpp/tests/generators/generators_test.cpp @@ -590,7 +590,7 @@ TEST_F(GeneratorsTest, ScrambleTest) using vertex_t = int32_t; using edge_t = int32_t; - edge_t num_vertices{30}; + vertex_t num_vertices{30}; edge_t num_edges{100}; raft::handle_t handle; @@ -614,7 +614,9 @@ TEST_F(GeneratorsTest, ScrambleTest) raft::update_device(d_src_v.data(), input_src_v.data(), input_src_v.size(), handle.get_stream()); raft::update_device(d_dst_v.data(), input_dst_v.data(), input_dst_v.size(), handle.get_stream()); - cugraph::scramble_vertex_ids(handle, d_src_v, d_dst_v, 5, 0); + auto lgN = static_cast(std::ceil(std::log2(num_vertices))); + std::tie(d_src_v, d_dst_v) = + cugraph::scramble_vertex_ids(handle, std::move(d_src_v), std::move(d_dst_v), lgN); auto output_src_v = cugraph::test::to_host(handle, d_src_v); auto output_dst_v = cugraph::test::to_host(handle, d_dst_v); diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index c854e8eee3e..16c9d3ed145 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -269,6 +269,10 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { b_, c_, undirected_ ? true : false); + if (scramble_vertex_ids_) { + std::tie(tmp_src_v, tmp_dst_v) = + cugraph::scramble_vertex_ids(handle, std::move(tmp_src_v), std::move(tmp_dst_v), scale_); + } std::optional> tmp_weights_v{std::nullopt}; if (weight_partitions) { @@ -347,6 +351,9 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { partition_vertex_firsts[i]); v_offset += partition_vertex_lasts[i] - partition_vertex_firsts[i]; } + if (scramble_vertex_ids_) { + vertex_v = cugraph::scramble_vertex_ids(handle, std::move(vertex_v), scale_); + } translate(handle, vertex_v);