From cdc563fce77947c655fe6ceecd6fb68d7b88bfd5 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 3 Aug 2022 20:36:44 -0500 Subject: [PATCH] Update PageRank to leverage pylibcugraph (#2467) This PR 1. Extends the capabilities of the `PageRank CAPI` to the `python cuGraph API` 2. Update or Add support for the parameters `personalization` `precomputed_vertex_out_weight` and `nstart` for both the SG and MG implementation closes #2455 closes #2430 Authors: - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/2467 --- .../cugraph/detail/shuffle_wrappers.hpp | 19 + cpp/include/cugraph_c/centrality_algorithms.h | 57 ++- cpp/src/c_api/pagerank.cpp | 222 +++++++++-- cpp/src/detail/shuffle_wrappers.cu | 66 +++- cpp/tests/c_api/mg_bfs_test.c | 34 +- cpp/tests/c_api/mg_pagerank_test.c | 150 +++++++- cpp/tests/c_api/pagerank_test.c | 170 ++++++++- cpp/tests/layout/trust_worthiness.h | 3 +- python/cugraph/CMakeLists.txt | 2 - .../cugraph/dask/link_analysis/CMakeLists.txt | 25 -- .../dask/link_analysis/mg_pagerank.pxd | 34 -- .../link_analysis/mg_pagerank_wrapper.pyx | 144 ------- .../cugraph/dask/link_analysis/pagerank.py | 356 +++++++++++------- .../algorithms/link_analysis/pagerank_alg.py | 14 +- .../cugraph/link_analysis/CMakeLists.txt | 25 -- .../cugraph/link_analysis/pagerank.pxd | 36 -- .../cugraph/cugraph/link_analysis/pagerank.py | 162 ++++++-- .../link_analysis/pagerank_wrapper.pyx | 142 ------- .../graph_implementation/simpleGraph.py | 10 +- .../cugraph/tests/mg/test_mg_pagerank.py | 77 +++- python/cugraph/cugraph/tests/test_pagerank.py | 80 +++- python/cugraph/cugraph/tests/test_paths.py | 10 +- .../cugraph/cugraph/utilities/nx_factory.py | 5 +- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 1 + python/pylibcugraph/pylibcugraph/__init__.py | 2 + .../_cugraph_c/centrality_algorithms.pxd | 10 +- python/pylibcugraph/pylibcugraph/pagerank.pyx | 80 ++-- .../pylibcugraph/personalized_pagerank.pyx | 252 +++++++++++++ .../pylibcugraph/tests/test_graph_sg.py | 2 +- .../pylibcugraph/tests/test_pagerank.py | 9 +- python/pylibcugraph/pylibcugraph/utils.pxd | 3 + python/pylibcugraph/pylibcugraph/utils.pyx | 50 ++- 32 files changed, 1534 insertions(+), 718 deletions(-) delete mode 100644 python/cugraph/cugraph/dask/link_analysis/CMakeLists.txt delete mode 100644 python/cugraph/cugraph/dask/link_analysis/mg_pagerank.pxd delete mode 100644 python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx delete mode 100644 python/cugraph/cugraph/link_analysis/CMakeLists.txt delete mode 100644 python/cugraph/cugraph/link_analysis/pagerank.pxd delete mode 100644 python/cugraph/cugraph/link_analysis/pagerank_wrapper.pyx create mode 100644 python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index e0ba45d95ce..bbd14552c47 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -62,6 +62,25 @@ template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( raft::handle_t const& handle, rmm::device_uvector&& d_vertices); +/** + * @brief Shuffle vertex/value tuples using the external vertex key function which returns the + * target GPU ID. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam value_t Type of values. + * + * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, + * @param[in] d_vertices Vertex IDs to shuffle + * @param[in] d_values Values to shuffle + * + * @return tuple containing device vector of shuffled vertices and device vector of shuffled values + */ +template +std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values); + /** * @brief Shuffle vertices using the internal vertex key function which returns the target GPU ID. * diff --git a/cpp/include/cugraph_c/centrality_algorithms.h b/cpp/include/cugraph_c/centrality_algorithms.h index ed21fcdce52..e197ac4b403 100644 --- a/cpp/include/cugraph_c/centrality_algorithms.h +++ b/cpp/include/cugraph_c/centrality_algorithms.h @@ -67,19 +67,31 @@ void cugraph_centrality_result_free(cugraph_centrality_result_t* result); * * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph + * @param [in] precomputed_vertex_out_weight_vertices + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. * @param [in] precomputed_vertex_out_weight_sums - * Optionally send in precomputed sume of vertex out weights + * Optionally send in precomputed sum of vertex out weights * (a performance optimization). Set to NULL if * no value is passed. + * @param [in] initial_guess_vertices + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. If NULL, initial PageRank + * values are set to 1.0 divided by the number of vertices in + * the graph. + * @param [in] initial_guess_values + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). Set to NULL if + * no value is passed. If NULL, initial PageRank values are set + * to 1.0 divided by the number of vertices in the graph. * @param [in] alpha PageRank damping factor. * @param [in] epsilon Error tolerance to check convergence. Convergence is assumed * if the sum of the differences in PageRank values between two * consecutive iterations is less than the number of vertices * in the graph multiplied by @p epsilon. * @param [in] max_iterations Maximum number of PageRank iterations. - * @param [in] has_initial_guess If set to `true`, values in the PageRank output array (pointed by - * @p pageranks) is used as initial PageRank values. If false, initial PageRank values are set - * to 1.0 divided by the number of vertices in the graph. * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to * `true`). * @param [out] result Opaque pointer to pagerank results @@ -90,11 +102,13 @@ void cugraph_centrality_result_free(cugraph_centrality_result_t* result); cugraph_error_code_t cugraph_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error); @@ -104,14 +118,27 @@ cugraph_error_code_t cugraph_pagerank( * * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph + * @param [in] precomputed_vertex_out_weight_vertices + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. * @param [in] precomputed_vertex_out_weight_sums - * Optionally send in precomputed sume of vertex out weights + * Optionally send in precomputed sum of vertex out weights * (a performance optimization). Set to NULL if * no value is passed. - * FIXME: Make this just [in], copy it if I need to temporarily modify internally - * @param [in/out] personalization_vertices Pointer to an array storing personalization vertex - * identifiers (compute personalized PageRank). Array might be modified if renumbering is enabled - * for the graph + * @param [in] initial_guess_vertices + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. If NULL, initial PageRank + * values are set to 1.0 divided by the number of vertices in + * the graph. + * @param [in] initial_guess_values + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). Set to NULL if + * no value is passed. If NULL, initial PageRank values are set + * to 1.0 divided by the number of vertices in the graph. + * @param [in] personalization_vertices Pointer to an array storing personalization vertex + * identifiers (compute personalized PageRank). * @param [in] personalization_values Pointer to an array storing personalization values for the * vertices in the personalization set. * @param [in] alpha PageRank damping factor. @@ -120,9 +147,6 @@ cugraph_error_code_t cugraph_pagerank( * consecutive iterations is less than the number of vertices * in the graph multiplied by @p epsilon. * @param [in] max_iterations Maximum number of PageRank iterations. - * @param [in] has_initial_guess If set to `true`, values in the PageRank output array (pointed by - * @p pageranks) is used as initial PageRank values. If false, initial PageRank values are set - * to 1.0 divided by the number of vertices in the graph. * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to * `true`). * @param [out] result Opaque pointer to pagerank results @@ -133,14 +157,15 @@ cugraph_error_code_t cugraph_pagerank( cugraph_error_code_t cugraph_personalized_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, - // FIXME: Make this const, copy it if I need to temporarily modify internally - cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, const cugraph_type_erased_device_array_view_t* personalization_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error); diff --git a/cpp/src/c_api/pagerank.cpp b/cpp/src/c_api/pagerank.cpp index 7621109d9d9..99fcca706b1 100644 --- a/cpp/src/c_api/pagerank.cpp +++ b/cpp/src/c_api/pagerank.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -33,36 +34,50 @@ namespace { struct pagerank_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph::c_api::cugraph_graph_t* graph_{}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* + precomputed_vertex_out_weight_vertices_{}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* precomputed_vertex_out_weight_sums_{}; - cugraph::c_api::cugraph_type_erased_device_array_view_t* personalization_vertices_{}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* initial_guess_vertices_{}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* initial_guess_values_{}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* personalization_vertices_{}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* personalization_values_{}; double alpha_{}; double epsilon_{}; size_t max_iterations_{}; - bool has_initial_guess_{}; bool do_expensive_check_{}; cugraph::c_api::cugraph_centrality_result_t* result_{}; pagerank_functor( cugraph_resource_handle_t const* handle, cugraph_graph_t* graph, + cugraph_type_erased_device_array_view_t const* precomputed_vertex_out_weight_vertices, cugraph_type_erased_device_array_view_t const* precomputed_vertex_out_weight_sums, - cugraph_type_erased_device_array_view_t* personalization_vertices, + cugraph_type_erased_device_array_view_t const* initial_guess_vertices, + cugraph_type_erased_device_array_view_t const* initial_guess_values, + cugraph_type_erased_device_array_view_t const* personalization_vertices, cugraph_type_erased_device_array_view_t const* personalization_values, double alpha, double epsilon, size_t max_iterations, - bool has_initial_guess, bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), graph_(reinterpret_cast(graph)), + precomputed_vertex_out_weight_vertices_( + reinterpret_cast( + precomputed_vertex_out_weight_vertices)), precomputed_vertex_out_weight_sums_( reinterpret_cast( precomputed_vertex_out_weight_sums)), + initial_guess_vertices_( + reinterpret_cast( + initial_guess_vertices)), + initial_guess_values_( + reinterpret_cast( + initial_guess_values)), personalization_vertices_( - reinterpret_cast( + reinterpret_cast( personalization_vertices)), personalization_values_( reinterpret_cast( @@ -70,7 +85,6 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { alpha_(alpha), epsilon_(epsilon), max_iterations_(max_iterations), - has_initial_guess_(has_initial_guess), do_expensive_check_(do_expensive_check) { } @@ -104,40 +118,113 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector pageranks(graph_view.local_vertex_partition_range_size(), handle_.get_stream()); + rmm::device_uvector personalization_vertices(0, handle_.get_stream()); + rmm::device_uvector personalization_values(0, handle_.get_stream()); + if (personalization_vertices_ != nullptr) { + personalization_vertices.resize(personalization_vertices_->size_, handle_.get_stream()); + personalization_values.resize(personalization_values_->size_, handle_.get_stream()); + + raft::copy(personalization_vertices.data(), + personalization_vertices_->as_type(), + personalization_vertices_->size_, + handle_.get_stream()); + raft::copy(personalization_values.data(), + personalization_values_->as_type(), + personalization_values_->size_, + handle_.get_stream()); + + if constexpr (multi_gpu) { + std::tie(personalization_vertices, personalization_values) = + cugraph::detail::shuffle_ext_vertices_and_values_by_gpu_id( + handle_, std::move(personalization_vertices), std::move(personalization_values)); + } // // Need to renumber personalization_vertices // - cugraph::renumber_ext_vertices( + cugraph::renumber_local_ext_vertices( handle_, - personalization_vertices_->as_type(), - personalization_vertices_->size_, + personalization_vertices.data(), + personalization_vertices.size(), number_map->data(), graph_view.local_vertex_partition_range_first(), graph_view.local_vertex_partition_range_last(), do_expensive_check_); } + rmm::device_uvector precomputed_vertex_out_weight_sums(0, handle_.get_stream()); + if (precomputed_vertex_out_weight_sums_ != nullptr) { + rmm::device_uvector precomputed_vertex_out_weight_vertices( + precomputed_vertex_out_weight_vertices_->size_, handle_.get_stream()); + precomputed_vertex_out_weight_sums.resize(precomputed_vertex_out_weight_sums_->size_, + handle_.get_stream()); + + raft::copy(precomputed_vertex_out_weight_vertices.data(), + precomputed_vertex_out_weight_vertices_->as_type(), + precomputed_vertex_out_weight_vertices_->size_, + handle_.get_stream()); + raft::copy(precomputed_vertex_out_weight_sums.data(), + precomputed_vertex_out_weight_sums_->as_type(), + precomputed_vertex_out_weight_sums_->size_, + handle_.get_stream()); + + precomputed_vertex_out_weight_sums = cugraph::detail:: + collect_local_vertex_values_from_ext_vertex_value_pairs( + handle_, + std::move(precomputed_vertex_out_weight_vertices), + std::move(precomputed_vertex_out_weight_sums), + *number_map, + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + weight_t{0}, + do_expensive_check_); + } + + if (initial_guess_values_ != nullptr) { + rmm::device_uvector initial_guess_vertices(initial_guess_vertices_->size_, + handle_.get_stream()); + rmm::device_uvector initial_guess_values(initial_guess_values_->size_, + handle_.get_stream()); + + raft::copy(initial_guess_vertices.data(), + initial_guess_vertices_->as_type(), + initial_guess_vertices.size(), + handle_.get_stream()); + + raft::copy(initial_guess_values.data(), + initial_guess_values_->as_type(), + initial_guess_values.size(), + handle_.get_stream()); + + pageranks = cugraph::detail:: + collect_local_vertex_values_from_ext_vertex_value_pairs( + handle_, + std::move(initial_guess_vertices), + std::move(initial_guess_values), + *number_map, + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + weight_t{0}, + do_expensive_check_); + } + cugraph::pagerank( handle_, graph_view, precomputed_vertex_out_weight_sums_ - ? std::make_optional(precomputed_vertex_out_weight_sums_->as_type()) - : std::nullopt, - personalization_vertices_ - ? std::make_optional(personalization_vertices_->as_type()) - : std::nullopt, - personalization_values_ - ? std::make_optional(personalization_values_->as_type()) + ? std::make_optional(precomputed_vertex_out_weight_sums.data()) : std::nullopt, + personalization_vertices_ ? std::make_optional(personalization_vertices.data()) + : std::nullopt, + personalization_values_ ? std::make_optional(personalization_values.data()) : std::nullopt, personalization_vertices_ - ? std::make_optional(static_cast(personalization_vertices_->size_)) + ? std::make_optional(static_cast(personalization_vertices.size())) : std::nullopt, pageranks.data(), static_cast(alpha_), static_cast(epsilon_), max_iterations_, - has_initial_guess_, + initial_guess_values_ != nullptr, do_expensive_check_); rmm::device_uvector vertex_ids(graph_view.local_vertex_partition_range_size(), @@ -156,24 +243,60 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { extern "C" cugraph_error_code_t cugraph_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error) { + if (precomputed_vertex_out_weight_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_sums) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_sums must match", + *error); + } + if (initial_guess_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + initial_guess_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + initial_guess_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_values must match", + *error); + } pagerank_functor functor(handle, graph, + precomputed_vertex_out_weight_vertices, precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, nullptr, nullptr, alpha, epsilon, max_iterations, - has_initial_guess, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); @@ -182,26 +305,79 @@ extern "C" cugraph_error_code_t cugraph_pagerank( extern "C" cugraph_error_code_t cugraph_personalized_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, - cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, const cugraph_type_erased_device_array_view_t* personalization_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error) { + if (precomputed_vertex_out_weight_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_sums) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_sums must match", + *error); + } + if (initial_guess_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + initial_guess_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + initial_guess_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_values must match", + *error); + } + if (personalization_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + personalization_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and personalization_vector must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + personalization_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and personalization_vector must match", + *error); + } + pagerank_functor functor(handle, graph, + precomputed_vertex_out_weight_vertices, precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, personalization_vertices, personalization_values, alpha, epsilon, max_iterations, - has_initial_guess, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); diff --git a/cpp/src/detail/shuffle_wrappers.cu b/cpp/src/detail/shuffle_wrappers.cu index c36e95d268f..af1008bbeca 100644 --- a/cpp/src/detail/shuffle_wrappers.cu +++ b/cpp/src/detail/shuffle_wrappers.cu @@ -220,6 +220,24 @@ rmm::device_uvector shuffle_vertices_by_gpu_id_impl( return d_rx_vertices; } +template +std::tuple, rmm::device_uvector> +shuffle_vertices_and_values_by_gpu_id_impl(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values, + func_t func) +{ + std::tie(d_vertices, d_values, std::ignore) = cugraph::groupby_gpu_id_and_shuffle_kv_pairs( + handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + d_values.begin(), + [key_func = func] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + + return std::make_tuple(std::move(d_vertices), std::move(d_values)); +} + template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( raft::handle_t const& handle, rmm::device_uvector&& d_vertices) @@ -256,22 +274,56 @@ rmm::device_uvector shuffle_int_vertices_by_gpu_id( return return_value; } -template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( - raft::handle_t const& handle, rmm::device_uvector&& d_vertices); - -template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( - raft::handle_t const& handle, rmm::device_uvector&& d_vertices); - template rmm::device_uvector shuffle_int_vertices_by_gpu_id( raft::handle_t const& handle, rmm::device_uvector&& d_vertices, std::vector const& vertex_partition_range_lasts); - template rmm::device_uvector shuffle_int_vertices_by_gpu_id( raft::handle_t const& handle, rmm::device_uvector&& d_vertices, std::vector const& vertex_partition_range_lasts); +template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + +template rmm::device_uvector shuffle_ext_vertices_by_gpu_id( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + +template +std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values) +{ + auto const comm_size = handle.get_comms().get_size(); + + return shuffle_vertices_and_values_by_gpu_id_impl( + handle, + std::move(d_vertices), + std::move(d_values), + cugraph::detail::compute_gpu_id_from_ext_vertex_t{comm_size}); +} + +template std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values); + +template std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values); + +template std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values); + +template std::tuple, rmm::device_uvector> +shuffle_ext_vertices_and_values_by_gpu_id(raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values); + template rmm::device_uvector groupby_and_count_edgelist_by_local_partition_id( raft::handle_t const& handle, diff --git a/cpp/tests/c_api/mg_bfs_test.c b/cpp/tests/c_api/mg_bfs_test.c index 0fc1fc431b5..ae1146c6a49 100644 --- a/cpp/tests/c_api/mg_bfs_test.c +++ b/cpp/tests/c_api/mg_bfs_test.c @@ -25,8 +25,7 @@ typedef int32_t vertex_t; typedef int32_t edge_t; typedef float weight_t; -int generic_bfs_test( - const cugraph_resource_handle_t* p_handle, +int generic_bfs_test(const cugraph_resource_handle_t* p_handle, vertex_t* h_src, vertex_t* h_dst, weight_t* h_wgt, @@ -37,15 +36,16 @@ int generic_bfs_test( size_t num_edges, size_t num_seeds, size_t depth_limit, - bool_t store_transposed) { + bool_t store_transposed) +{ int test_ret_value = 0; cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_graph_t* p_graph = NULL; - cugraph_paths_result_t* paths_result = NULL; - cugraph_type_erased_device_array_t* p_sources = NULL; + cugraph_graph_t* p_graph = NULL; + cugraph_paths_result_t* paths_result = NULL; + cugraph_type_erased_device_array_t* p_sources = NULL; cugraph_type_erased_device_array_view_t* p_source_view = NULL; ret_code = @@ -63,16 +63,9 @@ int generic_bfs_test( TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed."); - ret_code = cugraph_bfs(p_handle, - p_graph, - p_source_view, - FALSE, - 10000000, - TRUE, - TRUE, - &paths_result, - &ret_error); - + ret_code = cugraph_bfs( + p_handle, p_graph, p_source_view, FALSE, 10000000, TRUE, TRUE, &paths_result, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_bfs failed."); @@ -80,9 +73,9 @@ int generic_bfs_test( cugraph_type_erased_device_array_view_t* distances; cugraph_type_erased_device_array_view_t* predecessors; - vertices = cugraph_paths_result_get_vertices(paths_result); + vertices = cugraph_paths_result_get_vertices(paths_result); predecessors = cugraph_paths_result_get_predecessors(paths_result); - distances = cugraph_paths_result_get_distances(paths_result); + distances = cugraph_paths_result_get_distances(paths_result); vertex_t h_vertices[num_vertices]; vertex_t h_predecessors[num_vertices]; @@ -106,8 +99,7 @@ int generic_bfs_test( TEST_ASSERT(test_ret_value, expected_distances[h_vertices[i]] == h_distances[i], "bfs distances don't match"); - - + TEST_ASSERT(test_ret_value, expected_predecessors[h_vertices[i]] == h_predecessors[i], "bfs predecessors don't match"); @@ -168,7 +160,7 @@ int main(int argc, char** argv) void* raft_handle = create_raft_handle(prows); handle = cugraph_create_resource_handle(raft_handle); - int result = 0; + int result = 0; result |= RUN_MG_TEST(test_bfs, handle); cugraph_free_resource_handle(handle); diff --git a/cpp/tests/c_api/mg_pagerank_test.c b/cpp/tests/c_api/mg_pagerank_test.c index 7c557d7bed8..8ac0c3070f5 100644 --- a/cpp/tests/c_api/mg_pagerank_test.c +++ b/cpp/tests/c_api/mg_pagerank_test.c @@ -42,7 +42,7 @@ int generic_pagerank_test(const cugraph_resource_handle_t* handle, cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_graph_t* p_graph = NULL; + cugraph_graph_t* p_graph = NULL; cugraph_centrality_result_t* p_result = NULL; ret_code = create_mg_test_graph( @@ -51,7 +51,7 @@ int generic_pagerank_test(const cugraph_resource_handle_t* handle, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed."); ret_code = cugraph_pagerank( - handle, p_graph, NULL, alpha, epsilon, max_iterations, FALSE, FALSE, &p_result, &ret_error); + handle, p_graph, NULL, NULL, NULL, NULL, alpha, epsilon, max_iterations, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); // NOTE: Because we get back vertex ids and pageranks, we can simply compare @@ -90,6 +90,118 @@ int generic_pagerank_test(const cugraph_resource_handle_t* handle, return test_ret_value; } +int generic_personalized_pagerank_test(const cugraph_resource_handle_t *handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + vertex_t* h_personalization_vertices, + weight_t* h_personalization_values, + size_t num_vertices, + size_t num_edges, + size_t num_personalization_vertices, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + cugraph_type_erased_device_array_t* personalization_vertices = NULL; + cugraph_type_erased_device_array_t* personalization_values = NULL; + cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL; + cugraph_type_erased_device_array_view_t* personalization_values_view = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + if (cugraph_resource_handle_get_rank(handle) != 0) { + num_personalization_vertices = 0; + } + + ret_code = cugraph_type_erased_device_array_create( + handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed."); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed."); + + personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices); + personalization_values_view = cugraph_type_erased_device_array_view(personalization_values); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed."); + + ret_code = cugraph_personalized_pagerank(handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + personalization_vertices_view, + personalization_values_view, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices); + + vertex_t h_vertices[num_local_vertices]; + weight_t h_pageranks[num_local_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_mg_graph_free(p_graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + + int test_pagerank(const cugraph_resource_handle_t* handle) { size_t num_edges = 8; @@ -204,6 +316,39 @@ int test_pagerank_4_with_transpose(const cugraph_resource_handle_t* handle) max_iterations); } +int test_personalized_pagerank(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 3; + size_t num_vertices = 4; + + vertex_t h_src[] = {0, 1, 2}; + vertex_t h_dst[] = {1, 2, 3}; + weight_t h_wgt[] = {1.f, 1.f, 1.f}; + weight_t h_result[] = {0.0559233f, 0.159381f, 0.303244f, 0.481451f}; + + vertex_t h_personalized_vertices[] = {0, 1, 2, 3}; + weight_t h_personalized_values[] = {0.1, 0.2, 0.3, 0.4}; + + double alpha = 0.85; + double epsilon = 1.0e-6; + size_t max_iterations = 500; + + return generic_personalized_pagerank_test(handle, + h_src, + h_dst, + h_wgt, + h_result, + h_personalized_vertices, + h_personalized_values, + num_vertices, + num_edges, + num_vertices, + FALSE, + alpha, + epsilon, + max_iterations); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -246,6 +391,7 @@ int main(int argc, char** argv) result |= RUN_MG_TEST(test_pagerank_with_transpose, handle); result |= RUN_MG_TEST(test_pagerank_4, handle); result |= RUN_MG_TEST(test_pagerank_4_with_transpose, handle); + result |= RUN_MG_TEST(test_personalized_pagerank, handle); cugraph_free_resource_handle(handle); } diff --git a/cpp/tests/c_api/pagerank_test.c b/cpp/tests/c_api/pagerank_test.c index b985fb428e6..048750da06c 100644 --- a/cpp/tests/c_api/pagerank_test.c +++ b/cpp/tests/c_api/pagerank_test.c @@ -41,8 +41,8 @@ int generic_pagerank_test(vertex_t* h_src, cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_resource_handle_t* p_handle = NULL; - cugraph_graph_t* p_graph = NULL; + cugraph_resource_handle_t* p_handle = NULL; + cugraph_graph_t* p_graph = NULL; cugraph_centrality_result_t* p_result = NULL; p_handle = cugraph_create_resource_handle(NULL); @@ -52,10 +52,20 @@ int generic_pagerank_test(vertex_t* h_src, p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - - ret_code = cugraph_pagerank( - p_handle, p_graph, NULL, alpha, epsilon, max_iterations, FALSE, FALSE, &p_result, &ret_error); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_pagerank(p_handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); cugraph_type_erased_device_array_view_t* vertices; @@ -89,6 +99,115 @@ int generic_pagerank_test(vertex_t* h_src, return test_ret_value; } +int generic_personalized_pagerank_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + vertex_t* h_personalization_vertices, + weight_t* h_personalization_values, + size_t num_vertices, + size_t num_edges, + size_t num_personalization_vertices, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* p_handle = NULL; + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + cugraph_type_erased_device_array_t* personalization_vertices = NULL; + cugraph_type_erased_device_array_t* personalization_values = NULL; + cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL; + cugraph_type_erased_device_array_view_t* personalization_values_view = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + p_handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_type_erased_device_array_create( + p_handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed."); + + ret_code = cugraph_type_erased_device_array_create( + p_handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed."); + + personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices); + personalization_values_view = cugraph_type_erased_device_array_view(personalization_values); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed."); + + ret_code = cugraph_personalized_pagerank(p_handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + personalization_vertices_view, + personalization_values_view, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + vertex_t h_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_sg_graph_free(p_graph); + cugraph_free_resource_handle(p_handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_pagerank() { size_t num_edges = 8; @@ -137,7 +256,8 @@ int test_pagerank_4() vertex_t h_src[] = {0, 1, 2}; vertex_t h_dst[] = {1, 2, 3}; weight_t h_wgt[] = {1.f, 1.f, 1.f}; - weight_t h_result[] = {0.11615584790706635f, 0.21488840878009796f, 0.29881080985069275f, 0.37014490365982056f}; + weight_t h_result[] = { + 0.11615584790706635f, 0.21488840878009796f, 0.29881080985069275f, 0.37014490365982056f}; double alpha = 0.85; double epsilon = 1.0e-6; @@ -155,7 +275,8 @@ int test_pagerank_4_with_transpose() vertex_t h_src[] = {0, 1, 2}; vertex_t h_dst[] = {1, 2, 3}; weight_t h_wgt[] = {1.f, 1.f, 1.f}; - weight_t h_result[] = {0.11615584790706635f, 0.21488840878009796f, 0.29881080985069275f, 0.37014490365982056f}; + weight_t h_result[] = { + 0.11615584790706635f, 0.21488840878009796f, 0.29881080985069275f, 0.37014490365982056f}; double alpha = 0.85; double epsilon = 1.0e-6; @@ -165,6 +286,38 @@ int test_pagerank_4_with_transpose() h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, alpha, epsilon, max_iterations); } +int test_personalized_pagerank() +{ + size_t num_edges = 3; + size_t num_vertices = 4; + + vertex_t h_src[] = {0, 1, 2}; + vertex_t h_dst[] = {1, 2, 3}; + weight_t h_wgt[] = {1.f, 1.f, 1.f}; + weight_t h_result[] = {0.0559233f, 0.159381f, 0.303244f, 0.481451f}; + + vertex_t h_personalized_vertices[] = {0, 1, 2, 3}; + weight_t h_personalized_values[] = {0.1, 0.2, 0.3, 0.4}; + + double alpha = 0.85; + double epsilon = 1.0e-6; + size_t max_iterations = 500; + + return generic_personalized_pagerank_test(h_src, + h_dst, + h_wgt, + h_result, + h_personalized_vertices, + h_personalized_values, + num_vertices, + num_edges, + num_vertices, + FALSE, + alpha, + epsilon, + max_iterations); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -174,5 +327,6 @@ int main(int argc, char** argv) result |= RUN_TEST(test_pagerank_with_transpose); result |= RUN_TEST(test_pagerank_4); result |= RUN_TEST(test_pagerank_4_with_transpose); + result |= RUN_TEST(test_personalized_pagerank); return result; } diff --git a/cpp/tests/layout/trust_worthiness.h b/cpp/tests/layout/trust_worthiness.h index 5a112ea3c6b..3e6b018d6c5 100644 --- a/cpp/tests/layout/trust_worthiness.h +++ b/cpp/tests/layout/trust_worthiness.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ double euclidian_dist(const std::vector& x, const std::vector& y) std::vector> pairwise_distances(const std::vector>& X) { std::vector> distance_matrix(X.size(), std::vector(X[0].size())); -#pragma omp parallel for for (size_t i = 0; i < X.size(); ++i) { for (size_t j = 0; j < i; ++j) { const float val = euclidian_dist(X[i], X[j]); diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index ee929ca6e27..540b8d6d2af 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -70,13 +70,11 @@ add_subdirectory(cugraph/dask/centrality) add_subdirectory(cugraph/dask/comms) add_subdirectory(cugraph/dask/community) add_subdirectory(cugraph/dask/components) -add_subdirectory(cugraph/dask/link_analysis) add_subdirectory(cugraph/dask/structure) add_subdirectory(cugraph/generators) add_subdirectory(cugraph/internals) add_subdirectory(cugraph/layout) add_subdirectory(cugraph/linear_assignment) -add_subdirectory(cugraph/link_analysis) add_subdirectory(cugraph/link_prediction) add_subdirectory(cugraph/sampling) add_subdirectory(cugraph/structure) diff --git a/python/cugraph/cugraph/dask/link_analysis/CMakeLists.txt b/python/cugraph/cugraph/dask/link_analysis/CMakeLists.txt deleted file mode 100644 index b204a6b6927..00000000000 --- a/python/cugraph/cugraph/dask/link_analysis/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources mg_pagerank_wrapper.pyx) -set(linked_libraries cugraph::cugraph) -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX link_analysis_ -) - -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../library") -endforeach() diff --git a/python/cugraph/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/cugraph/dask/link_analysis/mg_pagerank.pxd deleted file mode 100644 index 4b47f43dd87..00000000000 --- a/python/cugraph/cugraph/dask/link_analysis/mg_pagerank.pxd +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright (c) 2020-2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from cugraph.structure.graph_utilities cimport * -from libcpp cimport bool - - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - - cdef void call_pagerank[vertex_t, weight_t]( - const handle_t &handle, - const graph_container_t &g, - vertex_t *identifiers, - weight_t *pagerank, - vertex_t size, - vertex_t *personalization_subset, - weight_t *personalization_values, - double alpha, - double tolerance, - long long max_iter, - bool has_guess) except + diff --git a/python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx deleted file mode 100644 index 3ae81de95ff..00000000000 --- a/python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ /dev/null @@ -1,144 +0,0 @@ -# -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from cugraph.structure.utils_wrapper import * -from cugraph.dask.link_analysis cimport mg_pagerank as c_pagerank -import cudf -from cugraph.structure.graph_utilities cimport * -import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper -from libc.stdint cimport uintptr_t -from cython.operator cimport dereference as deref -import numpy as np - - -def mg_pagerank(input_df, - src_col_name, - dst_col_name, - num_global_verts, - num_global_edges, - vertex_partition_offsets, - rank, - handle, - segment_offsets, - alpha=0.85, - max_iter=100, - tol=1.0e-5, - personalization=None, - nstart=None): - """ - Call pagerank - """ - cdef size_t handle_size_t = handle.getHandle() - handle_ = handle_size_t - - src = input_df[src_col_name] - dst = input_df[dst_col_name] - vertex_t = src.dtype - if num_global_edges > (2**31 - 1): - edge_t = np.dtype("int64") - else: - edge_t = vertex_t - if "value" in input_df.columns: - weights = input_df['value'] - weight_t = weights.dtype - is_weighted = True - raise NotImplementedError # FIXME: c_edge_weights is always set to NULL - else: - weights = None - weight_t = np.dtype("float32") - is_weighted = False - - # FIXME: Offsets and indices are currently hardcoded to int, but this may - # not be acceptable in the future. - numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, - np.dtype("int64") : numberTypeEnum.int64Type, - np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - - # FIXME: needs to be edge_t type not int - cdef int num_local_edges = len(src) - - cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] - cdef uintptr_t c_edge_weights = NULL - if weights is not None: - c_edge_weights = weights.__cuda_array_interface__['data'][0] - - # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C - vertex_partition_offsets_host = vertex_partition_offsets.values_host - cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] - - cdef vector[int] v_segment_offsets_32 - cdef vector[long] v_segment_offsets_64 - cdef uintptr_t c_segment_offsets - if (vertex_t == np.dtype("int32")): - v_segment_offsets_32 = segment_offsets - c_segment_offsets = v_segment_offsets_32.data() - else: - v_segment_offsets_64 = segment_offsets - c_segment_offsets = v_segment_offsets_64.data() - - cdef graph_container_t graph_container - - populate_graph_container(graph_container, - handle_[0], - c_src_vertices, c_dst_vertices, c_edge_weights, - c_vertex_partition_offsets, - c_segment_offsets, - len(segment_offsets) - 1, - ((numberTypeMap[vertex_t])), - ((numberTypeMap[edge_t])), - ((numberTypeMap[weight_t])), - num_local_edges, - num_global_verts, num_global_edges, - is_weighted, - False, - True, True) - - df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) - df['pagerank'] = cudf.Series(np.zeros(len(df['vertex']), dtype=weight_t)) - - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; - cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; - - cdef uintptr_t c_pers_vtx = NULL - cdef uintptr_t c_pers_val = NULL - cdef int sz = 0 - - if personalization is not None: - sz = personalization['vertex'].shape[0] - personalization['vertex'] = personalization['vertex'].astype(vertex_t) - personalization['values'] = personalization['values'].astype(weight_t) - c_pers_vtx = personalization['vertex'].__cuda_array_interface__['data'][0] - c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] - - if vertex_t == np.int32: - if (df['pagerank'].dtype == np.float32): - c_pagerank.call_pagerank[int, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - else: - c_pagerank.call_pagerank[int, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - else: - if (df['pagerank'].dtype == np.float32): - c_pagerank.call_pagerank[long, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - else: - c_pagerank.call_pagerank[long, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - - return df diff --git a/python/cugraph/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/cugraph/dask/link_analysis/pagerank.py index 04ee580a34f..b60e2d81bd4 100644 --- a/python/cugraph/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/dask/link_analysis/pagerank.py @@ -13,65 +13,133 @@ # limitations under the License. # -from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import (get_distributed_data, - get_vertex_partition_offsets) -from cugraph.dask.link_analysis import mg_pagerank_wrapper as mg_pagerank +from dask.distributed import wait import cugraph.dask.comms.comms as Comms import dask_cudf -from dask.dataframe.shuffle import rearrange_by_column - - -def call_pagerank(sID, - data, - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - aggregate_segment_offsets, - alpha, - max_iter, - tol, - personalization, - nstart): - wid = Comms.get_worker_id(sID) - handle = Comms.get_handle(sID) - local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) - segment_offsets = \ - aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] - return mg_pagerank.mg_pagerank(data[0], - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - wid, - handle, - segment_offsets, - alpha, - max_iter, - tol, - personalization, - nstart) +import cudf +import numpy as np +import warnings +from cugraph.dask.common.input_utils import get_distributed_data +from pylibcugraph import (ResourceHandle, + pagerank as pylibcugraph_pagerank, + personalized_pagerank as pylibcugraph_p_pagerank + ) -def pagerank(input_graph, - alpha=0.85, - personalization=None, - max_iter=100, - tol=1.0e-5, - nstart=None): +def convert_to_cudf(cp_arrays): + """ + Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper + """ + cupy_vertices, cupy_pagerank = cp_arrays + df = cudf.DataFrame() + df["vertex"] = cupy_vertices + df["pagerank"] = cupy_pagerank + + return df + + +# FIXME: Move this function to the utility module so that it can be +# shared by other algos +def ensure_valid_dtype(input_graph, input_df, input_df_name): + if input_graph.properties.weighted is False: + edge_attr_dtype = np.float64 + else: + edge_attr_dtype = input_graph.input_df["value"].dtype + + input_df_dtype = input_df["values"].dtype + if input_df_dtype != edge_attr_dtype: + warning_msg = (f"PageRank requires '{input_df_name}' values " + "to match the graph's 'edge_attr' type. " + f"edge_attr type is: {edge_attr_dtype} and got " + f"'{input_df_name}' values of type: " + f"{input_df_dtype}.") + warnings.warn(warning_msg, UserWarning) + input_df = input_df.astype( + {"values": edge_attr_dtype}) + + return input_df + + +def renumber_vertices(input_graph, input_df): + input_df = input_graph.add_internal_vertex_id( + input_df, "vertex", "vertex").compute() + + return input_df + + +def _call_plc_pagerank(sID, + mg_graph_x, + pre_vtx_o_wgt_vertices, + pre_vtx_o_wgt_sums, + initial_guess_vertices, + initial_guess_values, + alpha, + epsilon, + max_iterations, + do_expensive_check): + + return pylibcugraph_pagerank( + resource_handle=ResourceHandle( + Comms.get_handle(sID).getHandle() + ), + graph=mg_graph_x, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=epsilon, + max_iterations=max_iterations, + do_expensive_check=do_expensive_check + ) + + +def _call_plc_personalized_pagerank(sID, + mg_graph_x, + pre_vtx_o_wgt_vertices, + pre_vtx_o_wgt_sums, + data_personalization, + initial_guess_vertices, + initial_guess_values, + alpha, + epsilon, + max_iterations, + do_expensive_check): + personalization_vertices = data_personalization["vertex"] + personalization_values = data_personalization["values"] + return pylibcugraph_p_pagerank( + resource_handle=ResourceHandle( + Comms.get_handle(sID).getHandle() + ), + graph=mg_graph_x, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + personalization_vertices=personalization_vertices, + personalization_values=personalization_values, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=epsilon, + max_iterations=max_iterations, + do_expensive_check=do_expensive_check + ) + + +def pagerank(input_graph, + alpha=0.85, personalization=None, + precomputed_vertex_out_weight=None, + max_iter=100, tol=1.0e-5, nstart=None): """ Find the PageRank values for each vertex in a graph using multiple GPUs. cuGraph computes an approximation of the Pagerank using the power method. The input graph must contain edge list as dask-cudf dataframe with one partition per GPU. + All edges will have an edge_attr value of 1.0 if not provided. Parameters ---------- - input_graph : cugraph.DiGraph + input_graph : cugraph.Graph cuGraph graph descriptor, should contain the connectivity information as dask cudf edge list dataframe(edge weights are not used for this algorithm). @@ -84,19 +152,29 @@ def pagerank(input_graph, personalization : cudf.Dataframe, optional (default=None) GPU Dataframe containing the personalization information. - Currently not supported. - + (a performance optimization) personalization['vertex'] : cudf.Series Subset of vertices of graph for personalization personalization['values'] : cudf.Series Personalization values for vertices + precomputed_vertex_out_weight : cudf.Dataframe, optional (default=None) + GPU Dataframe containing the precomputed vertex out weight + (a performance optimization) + information. + precomputed_vertex_out_weight['vertex'] : cudf.Series + Subset of vertices of graph for precomputed_vertex_out_weight + precomputed_vertex_out_weight['sums'] : cudf.Series + Corresponding precomputed sum of outgoing vertices weight + max_iter : int, optional (default=100) - The maximum number of iterations before an answer is returned. + The maximum number of iterations before an answer is returned. This can + be used to limit the execution time and do an early exit before the + solver reaches the convergence tolerance. If this value is lower or equal to 0 cuGraph will use the default - value, which is 30. + value, which is 100. - tol : float, optional (default=1.0e-5) + tol : float, optional (default=1e-05) Set the tolerance the approximation, this parameter should be a small magnitude value. The lower the tolerance the better the approximation. If this value is @@ -105,8 +183,13 @@ def pagerank(input_graph, numerical roundoff. Usually values between 0.01 and 0.00001 are acceptable. - nstart : not supported - initial guess for pagerank + nstart : cudf.Dataframe, optional (default=None) + GPU Dataframe containing the initial guess for pagerank. + (a performance optimization) + nstart['vertex'] : cudf.Series + Subset of vertices of graph for initial guess for pagerank values + nstart['values'] : cudf.Series + Pagerank values for vertices Returns ------- @@ -114,6 +197,16 @@ def pagerank(input_graph, GPU data frame containing two dask_cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. + NOTE: if the input cugraph.Graph was created using the renumber=False + option of any of the from_*_edgelist() methods, pagerank assumes that + the vertices in the edgelist are contiguous and start from 0. + If the actual set of vertices in the edgelist is not + contiguous (has gaps) or does not start from zero, pagerank will assume + the "missing" vertices are isolated vertices in the graph, and will + compute and return pagerank values for each. If this is not the desired + behavior, ensure the input cugraph.Graph is created from the + from_*_edgelist() functions with the renumber=True option (the default) + ddf['vertex'] : dask_cudf.Series Contains the vertex identifiers ddf['pagerank'] : dask_cudf.Series @@ -136,94 +229,95 @@ def pagerank(input_graph, >>> pr = dcg.pagerank(dg) """ - nstart = None - client = default_client() + # Initialize dask client + client = input_graph._client - input_graph.compute_renumber_edge_list(transposed=True) + initial_guess_vertices = None + initial_guess_values = None + precomputed_vertex_out_weight_vertices = None + precomputed_vertex_out_weight_sums = None - ddf = input_graph.edgelist.edgelist_df - vertex_partition_offsets = get_vertex_partition_offsets(input_graph) - num_verts = vertex_partition_offsets.iloc[-1] - num_edges = len(ddf) - data = get_distributed_data(ddf) + do_expensive_check = False - src_col_name = input_graph.renumber_map.renumbered_src_col_name - dst_col_name = input_graph.renumber_map.renumbered_dst_col_name + # FIXME: Distribute the 'precomputed_vertex_out_weight' + # across GPUs for performance optimization + if precomputed_vertex_out_weight is not None: + if input_graph.renumbered is True: + precomputed_vertex_out_weight = renumber_vertices( + input_graph, precomputed_vertex_out_weight) + precomputed_vertex_out_weight_vertices = \ + precomputed_vertex_out_weight["vertex"] + precomputed_vertex_out_weight_sums = \ + precomputed_vertex_out_weight["sums"] + + # FIXME: Distribute the 'nstart' across GPUs for performance optimization + if nstart is not None: + if input_graph.renumbered is True: + nstart = renumber_vertices(input_graph, nstart) + nstart = ensure_valid_dtype( + input_graph, nstart, "nstart") + initial_guess_vertices = nstart["vertex"] + initial_guess_values = nstart["values"] if personalization is not None: if input_graph.renumbered is True: - personalization = input_graph.add_internal_vertex_id( - personalization, "vertex", "vertex" - ) + personalization = renumber_vertices(input_graph, personalization) + personalization = ensure_valid_dtype( + input_graph, personalization, "personalization") - # Function to assign partition id to personalization dataframe - def _set_partitions_pre(s, divisions): - partitions = divisions.searchsorted(s, side="right") - 1 - partitions[ - divisions.tail(1).searchsorted(s, side="right").astype("bool") - ] = (len(divisions) - 2) - return partitions - - # Assign partition id column as per vertex_partition_offsets - df = personalization - by = ['vertex'] - meta = df._meta._constructor_sliced([0]) - divisions = vertex_partition_offsets - partitions = df[by].map_partitions( - _set_partitions_pre, divisions=divisions, meta=meta - ) - - df2 = df.assign(_partitions=partitions) - - # Shuffle personalization values according to the partition id - df3 = rearrange_by_column( - df2, - "_partitions", - max_branch=None, - npartitions=len(divisions) - 1, - shuffle="tasks", - ignore_index=False, - ).drop(columns=["_partitions"]) - - p_data = get_distributed_data(df3) - - result = [client.submit(call_pagerank, - Comms.get_session_id(), - wf[1], - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - input_graph.aggregate_segment_offsets, - alpha, - max_iter, - tol, - p_data.worker_to_parts[wf[0]][0], - nstart, - workers=[wf[0]]) - for idx, wf in enumerate(data.worker_to_parts.items())] + personalization_ddf = dask_cudf.from_cudf( + personalization, npartitions=len(Comms.get_workers())) + + data_prsztn = get_distributed_data(personalization_ddf) + + result = [ + client.submit( + _call_plc_personalized_pagerank, + Comms.get_session_id(), + input_graph._plc_graph[w], + precomputed_vertex_out_weight_vertices, + precomputed_vertex_out_weight_sums, + data_personalization[0], + initial_guess_vertices, + initial_guess_values, + alpha, + tol, + max_iter, + do_expensive_check, + workers=[w], + ) + for w, data_personalization in data_prsztn.worker_to_parts.items() + ] else: - result = [client.submit(call_pagerank, - Comms.get_session_id(), - wf[1], - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - input_graph.aggregate_segment_offsets, - alpha, - max_iter, - tol, - personalization, - nstart, - workers=[wf[0]]) - for idx, wf in enumerate(data.worker_to_parts.items())] + result = [ + client.submit( + _call_plc_pagerank, + Comms.get_session_id(), + input_graph._plc_graph[w], + precomputed_vertex_out_weight_vertices, + precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, + alpha, + tol, + max_iter, + do_expensive_check, + workers=[w], + ) + for w in Comms.get_workers() + ] + wait(result) - ddf = dask_cudf.from_delayed(result) + + cudf_result = [client.submit(convert_to_cudf, + cp_arrays) + for cp_arrays in result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result) if input_graph.renumbered: - return input_graph.unrenumber(ddf, 'vertex') + ddf = input_graph.unrenumber(ddf, "vertex") return ddf diff --git a/python/cugraph/cugraph/experimental/compat/nx/algorithms/link_analysis/pagerank_alg.py b/python/cugraph/cugraph/experimental/compat/nx/algorithms/link_analysis/pagerank_alg.py index 4ffe01aadce..c046a1bfd0b 100644 --- a/python/cugraph/cugraph/experimental/compat/nx/algorithms/link_analysis/pagerank_alg.py +++ b/python/cugraph/cugraph/experimental/compat/nx/algorithms/link_analysis/pagerank_alg.py @@ -115,10 +115,10 @@ def pagerank( local_nstart = create_cudf_from_dict(nstart) return cugraph.pagerank( G, - alpha, - local_pers, - max_iter, - tol, - local_nstart, - weight, - dangling) + alpha=alpha, + personalization=local_pers, + max_iter=max_iter, + tol=tol, + nstart=local_nstart, + weight=weight, + dangling=dangling) diff --git a/python/cugraph/cugraph/link_analysis/CMakeLists.txt b/python/cugraph/cugraph/link_analysis/CMakeLists.txt deleted file mode 100644 index 30dbe239ea9..00000000000 --- a/python/cugraph/cugraph/link_analysis/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources pagerank_wrapper.pyx) -set(linked_libraries cugraph::cugraph) -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX link_analysis_ -) - -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") -endforeach() diff --git a/python/cugraph/cugraph/link_analysis/pagerank.pxd b/python/cugraph/cugraph/link_analysis/pagerank.pxd deleted file mode 100644 index ed8f763b3ca..00000000000 --- a/python/cugraph/cugraph/link_analysis/pagerank.pxd +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cugraph.structure.graph_utilities cimport * -from libcpp cimport bool - - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - - cdef void call_pagerank[VT,WT]( - const handle_t &handle, - const graph_container_t &g, - VT *identifiers, - WT *pagerank, - VT size, - VT *personalization_subset, - WT *personalization_values, - double alpha, - double tolerance, - long long max_iter, - bool has_guess) except + diff --git a/python/cugraph/cugraph/link_analysis/pagerank.py b/python/cugraph/cugraph/link_analysis/pagerank.py index ecb0ba6ea74..c0eb3a02ddb 100644 --- a/python/cugraph/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/link_analysis/pagerank.py @@ -11,17 +11,57 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cudf - -from cugraph.link_analysis import pagerank_wrapper from cugraph.utilities import (ensure_cugraph_obj_for_nx, df_score_to_dictionary, ) +import cudf +import numpy as np +import warnings + +from pylibcugraph import (pagerank as pylibcugraph_pagerank, + personalized_pagerank as pylibcugraph_p_pagerank, + ResourceHandle + ) + + +def renumber_vertices(input_graph, input_df): + if len(input_graph.renumber_map.implementation.col_names) > 1: + cols = input_df.columns[:-1].to_list() + else: + cols = 'vertex' + input_df = input_graph.add_internal_vertex_id( + input_df, "vertex", cols + ) + + return input_df + + +# FIXME: Move this function to the utility module so that it can be +# shared by other algos +def ensure_valid_dtype(input_graph, input_df, input_df_name): + if input_graph.edgelist.weights is False: + edge_attr_dtype = np.float32 + else: + edge_attr_dtype = input_graph.edgelist.edgelist_df["weights"].dtype + + input_df_dtype = input_df["values"].dtype + if input_df_dtype != edge_attr_dtype: + warning_msg = (f"PageRank requires '{input_df_name}' values " + "to match the graph's 'edge_attr' type. " + f"edge_attr type is: {edge_attr_dtype} and got " + f"'{input_df_name}' values of type: " + f"{input_df_dtype}.") + warnings.warn(warning_msg, UserWarning) + input_df = input_df.astype( + {"values": edge_attr_dtype}) + + return input_df def pagerank( - G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None, - weight=None, dangling=None + G, alpha=0.85, personalization=None, + precomputed_vertex_out_weight=None, + max_iter=100, tol=1.0e-5, nstart=None, weight=None, dangling=None ): """ Find the PageRank score for every vertex in a graph. cuGraph computes an @@ -30,8 +70,7 @@ def pagerank( increases when the tolerance descreases and/or alpha increases toward the limiting value of 1. The user is free to use default values or to provide inputs for the initial guess, tolerance and maximum number of iterations. - - Parameters + Parameters. All edges will have an edge_attr value of 1.0 if not provided. ---------- G : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information @@ -46,12 +85,20 @@ def pagerank( personalization : cudf.Dataframe, optional (default=None) GPU Dataframe containing the personalization information. - + (a performance optimization) personalization['vertex'] : cudf.Series Subset of vertices of graph for personalization personalization['values'] : cudf.Series Personalization values for vertices + precomputed_vertex_out_weight : cudf.Dataframe, optional (default=None) + GPU Dataframe containing the precomputed vertex out weight + information(a performance optimization). + precomputed_vertex_out_weight['vertex'] : cudf.Series + Subset of vertices of graph for precomputed_vertex_out_weight + precomputed_vertex_out_weight['sums'] : cudf.Series + Corresponding precomputed sum of outgoing vertices weight + max_iter : int, optional (default=100) The maximum number of iterations before an answer is returned. This can be used to limit the execution time and do an early exit before the @@ -70,7 +117,7 @@ def pagerank( nstart : cudf.Dataframe, optional (default=None) GPU Dataframe containing the initial guess for pagerank. - + (a performance optimization). nstart['vertex'] : cudf.Series Subset of vertices of graph for initial guess for pagerank values nstart['values'] : cudf.Series @@ -90,21 +137,51 @@ def pagerank( GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. + NOTE: if the input cugraph.Graph was created using the renumber=False + option of any of the from_*_edgelist() methods, pagerank assumes that + the vertices in the edgelist are contiguous and start from 0. + If the actual set of vertices in the edgelist is not + contiguous (has gaps) or does not start from zero, pagerank will assume + the "missing" vertices are isolated vertices in the graph, and will + compute and return pagerank values for each. If this is not the desired + behavior, ensure the input cugraph.Graph is created from the + from_*_edgelist() functions with the renumber=True option (the default) + df['vertex'] : cudf.Series Contains the vertex identifiers df['pagerank'] : cudf.Series Contains the PageRank score - - Examples -------- >>> from cugraph.experimental.datasets import karate >>> G = karate.get_graph(fetch=True) >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05) - """ + initial_guess_vertices = None + initial_guess_values = None + pre_vtx_o_wgt_vertices = None + pre_vtx_o_wgt_sums = None + G, isNx = ensure_cugraph_obj_for_nx(G, weight) + do_expensive_check = False + + if nstart is not None: + if G.renumbered is True: + nstart = renumber_vertices(G, nstart) + nstart = ensure_valid_dtype( + G, nstart, "nstart") + initial_guess_vertices = nstart["vertex"] + initial_guess_values = nstart["values"] + + if precomputed_vertex_out_weight is not None: + if G.renumbered is True: + precomputed_vertex_out_weight = renumber_vertices( + G, precomputed_vertex_out_weight) + pre_vtx_o_wgt_vertices = \ + precomputed_vertex_out_weight["vertex"] + pre_vtx_o_wgt_sums = \ + precomputed_vertex_out_weight["sums"] if personalization is not None: if not isinstance(personalization, cudf.DataFrame): @@ -113,32 +190,49 @@ def pagerank( "currently not supported" ) if G.renumbered is True: - if len(G.renumber_map.implementation.col_names) > 1: - cols = personalization.columns[:-1].to_list() - else: - cols = 'vertex' - personalization = G.add_internal_vertex_id( - personalization, "vertex", cols - ) - - if nstart is not None: - if G.renumbered is True: - if len(G.renumber_map.implementation.col_names) > 1: - cols = nstart.columns[:-1].to_list() - else: - cols = 'vertex' - nstart = G.add_internal_vertex_id( - nstart, "vertex", cols + personalization = renumber_vertices( + G, personalization) + + personalization = ensure_valid_dtype( + G, personalization, "personalization") + + vertex, pagerank_values = \ + pylibcugraph_p_pagerank( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + personalization_vertices=personalization["vertex"], + personalization_values=personalization["values"], + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=tol, + max_iterations=max_iter, + do_expensive_check=do_expensive_check) + else: + vertex, pagerank_values = \ + pylibcugraph_pagerank( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=tol, + max_iterations=max_iter, + do_expensive_check=do_expensive_check ) - df = pagerank_wrapper.pagerank( - G, alpha, personalization, max_iter, tol, nstart - ) + df = cudf.DataFrame() + df["vertex"] = vertex + df["pagerank"] = pagerank_values if G.renumbered: df = G.unrenumber(df, "vertex") if isNx is True: - return df_score_to_dictionary(df, 'pagerank') - else: - return df + df = df_score_to_dictionary(df, 'pagerank') + + return df diff --git a/python/cugraph/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/cugraph/link_analysis/pagerank_wrapper.pyx deleted file mode 100644 index d94a61b4016..00000000000 --- a/python/cugraph/cugraph/link_analysis/pagerank_wrapper.pyx +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cugraph.link_analysis.pagerank cimport call_pagerank -from cugraph.structure.graph_utilities cimport * -from libcpp cimport bool -from libc.stdint cimport uintptr_t -from cugraph.structure import graph_primtypes_wrapper -import cudf -import numpy as np - - -def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None): - """ - Call pagerank - """ - - cdef unique_ptr[handle_t] handle_ptr - handle_ptr.reset(new handle_t()) - handle_ = handle_ptr.get(); - - [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32, np.int64]) - weights = None - if input_graph.edgelist.weights: - [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) - - num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges(directed_edges=True) - # FIXME: needs to be edge_t type not int - cdef int num_local_edges = len(src) - cdef uintptr_t c_edge_weights = NULL - if weights is not None: - c_edge_weights = weights.__cuda_array_interface__['data'][0] - weight_t = weights.dtype - is_weighted = True - else: - weight_t = np.dtype("float32") - is_weighted = False - - df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.arange(num_verts, dtype=src.dtype)) - df['pagerank'] = cudf.Series(np.zeros(num_verts, dtype=weight_t)) - - cdef bool has_guess = 0 - if nstart is not None: - if len(nstart) != num_verts: - raise ValueError('nstart must have initial guess for all vertices') - df['pagerank'][nstart['vertex']] = nstart['values'] - has_guess = 1 - - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; - cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; - - cdef uintptr_t c_pers_vtx = NULL - cdef uintptr_t c_pers_val = NULL - cdef int sz = 0 - - cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] - - personalization_id_series = None - - is_symmetric = not input_graph.is_directed() - - # FIXME: Offsets and indices are currently hardcoded to int, but this may - # not be acceptable in the future. - numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, - np.dtype("int64") : numberTypeEnum.int64Type, - np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - - if personalization is not None: - sz = personalization['vertex'].shape[0] - personalization['vertex'] = personalization['vertex'].astype(src.dtype) - personalization['values'] = personalization['values'].astype(df['pagerank'].dtype) - c_pers_vtx = personalization['vertex'].__cuda_array_interface__['data'][0] - c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] - - cdef graph_container_t graph_container - populate_graph_container(graph_container, - handle_[0], - c_src_vertices, c_dst_vertices, c_edge_weights, - NULL, - NULL, - 0, - ((numberTypeMap[src.dtype])), - ((numberTypeMap[src.dtype])), - ((numberTypeMap[weight_t])), - num_local_edges, - num_verts, num_edges, - is_weighted, - is_symmetric, - True, - False) - - if (df['pagerank'].dtype == np.float32): - if (df['vertex'].dtype == np.int32): - call_pagerank[int, float](handle_[0], graph_container, - c_identifier, - c_pagerank_val, sz, - c_pers_vtx, c_pers_val, - alpha, tol, - max_iter, has_guess) - else: - call_pagerank[long, float](handle_[0], graph_container, - c_identifier, - c_pagerank_val, sz, - c_pers_vtx, c_pers_val, - alpha, tol, - max_iter, has_guess) - - else: - if (df['vertex'].dtype == np.int32): - call_pagerank[int, double](handle_[0], graph_container, - c_identifier, - c_pagerank_val, sz, - c_pers_vtx, c_pers_val, - alpha, tol, - max_iter, has_guess) - else: - call_pagerank[long, double](handle_[0], graph_container, - c_identifier, - c_pagerank_val, sz, - c_pers_vtx, c_pers_val, - alpha, tol, - max_iter, has_guess) - return df diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 32ee82a30f6..889d7c3218e 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -201,7 +201,8 @@ def __from_edgelist( self._make_plc_graph( value_col=value_col, - store_transposed=store_transposed + store_transposed=store_transposed, + renumber=renumber ) def to_pandas_edgelist(self, source='src', destination='dst', @@ -763,7 +764,10 @@ def _degree(self, vertex_subset, direction=Direction.ALL): return df - def _make_plc_graph(self, value_col=None, store_transposed=False): + def _make_plc_graph(self, + value_col=None, + store_transposed=False, + renumber=True): if value_col is None: value_col = cudf.Series( cupy.ones(len(self.edgelist.edgelist_df), dtype='float32') @@ -788,7 +792,7 @@ def _make_plc_graph(self, value_col=None, store_transposed=False): dst_array=self.edgelist.edgelist_df['dst'], weight_array=value_col, store_transposed=store_transposed, - renumber=False, + renumber=renumber, do_expensive_check=False ) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_pagerank.py b/python/cugraph/cugraph/tests/mg/test_mg_pagerank.py index 7ed651679fa..9bc835a2320 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_pagerank.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_pagerank.py @@ -16,6 +16,7 @@ import gc import cugraph import dask_cudf +from cugraph.testing import utils import cudf # from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH @@ -47,8 +48,20 @@ def personalize(vertices, personalization_perc): return cu_personalization, personalization +# ============================================================================= +# Parameters +# ============================================================================= PERSONALIZATION_PERC = [0, 10, 50] IS_DIRECTED = [True, False] +HAS_GUESS = [0, 1] +HAS_PRECOMPUTED = [0, 1] + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() # @pytest.mark.skipif( @@ -56,8 +69,10 @@ def personalize(vertices, personalization_perc): # ) @pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_pagerank(dask_client, personalization_perc, directed): - gc.collect() +@pytest.mark.parametrize("has_precomputed_vertex_out_weight", HAS_PRECOMPUTED) +@pytest.mark.parametrize("has_guess", HAS_GUESS) +def test_dask_pagerank(dask_client, personalization_perc, directed, + has_precomputed_vertex_out_weight, has_guess): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() @@ -80,21 +95,41 @@ def test_dask_pagerank(dask_client, personalization_perc, directed): ) g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst") + g.from_cudf_edgelist(df, "src", "dst", "value") dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") personalization = None + pre_vtx_o_wgt = None + nstart = None + max_iter = 100 + has_precomputed_vertex_out_weight if personalization_perc != 0: personalization, p = personalize( g.nodes(), personalization_perc ) + if has_precomputed_vertex_out_weight == 1: + df = df[["src", "value"]] + pre_vtx_o_wgt = df.groupby( + ['src'], as_index=False).sum().rename( + columns={"src": "vertex", "value": "sums"}) + + if has_guess == 1: + nstart = cugraph.pagerank( + g, personalization=personalization, tol=1e-6).rename( + columns={"pagerank": "values"}) + max_iter = 20 expected_pr = cugraph.pagerank( - g, personalization=personalization, tol=1e-6 + g, personalization=personalization, + precomputed_vertex_out_weight=pre_vtx_o_wgt, + max_iter=max_iter, tol=1e-6, nstart=nstart ) - result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6) + result_pr = dcg.pagerank( + dg, personalization=personalization, + precomputed_vertex_out_weight=pre_vtx_o_wgt, + max_iter=max_iter, tol=1e-6, nstart=nstart) result_pr = result_pr.compute() err = 0 @@ -114,3 +149,33 @@ def test_dask_pagerank(dask_client, personalization_perc, directed): if diff > tol * 1.1: err = err + 1 assert err == 0 + + +def test_pagerank_invalid_personalization_dtype(dask_client): + input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / + "karate.csv").as_posix() + + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=True) + dg.from_dask_cudf_edgelist( + ddf, source='src', destination='dst', + edge_attr="value", renumber=True) + + personalization_vec = cudf.DataFrame() + personalization_vec['vertex'] = [17, 26] + personalization_vec['values'] = [0.5, 0.75] + warning_msg = ("PageRank requires 'personalization' values to match the " + "graph's 'edge_attr' type. edge_attr type is: " + "float32 and got 'personalization' values " + "of type: float64.") + + with pytest.warns(UserWarning, match=warning_msg): + dcg.pagerank(dg, personalization=personalization_vec) diff --git a/python/cugraph/cugraph/tests/test_pagerank.py b/python/cugraph/cugraph/tests/test_pagerank.py index b9329fc09bd..f862ccf858d 100644 --- a/python/cugraph/cugraph/tests/test_pagerank.py +++ b/python/cugraph/cugraph/tests/test_pagerank.py @@ -47,7 +47,8 @@ def cudify(d): return cuD -def cugraph_call(G, max_iter, tol, alpha, personalization, nstart): +def cugraph_call(G, max_iter, tol, alpha, personalization, + nstart, pre_vtx_o_wgt): # cugraph Pagerank Call t1 = time.time() df = cugraph.pagerank( @@ -56,6 +57,7 @@ def cugraph_call(G, max_iter, tol, alpha, personalization, nstart): max_iter=max_iter, tol=tol, personalization=personalization, + precomputed_vertex_out_weight=pre_vtx_o_wgt, nstart=nstart, ) t2 = time.time() - t1 @@ -130,11 +132,22 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): return pr, personalization +# ============================================================================= +# Parameters +# ============================================================================= MAX_ITERATIONS = [500] TOLERANCE = [1.0e-06] ALPHA = [0.85] PERSONALIZATION_PERC = [0, 10, 50] HAS_GUESS = [0, 1] +HAS_PRECOMPUTED = [0, 1] + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() # FIXME: the default set of datasets includes an asymmetric directed graph @@ -145,16 +158,18 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # https://github.com/rapidsai/cugraph/issues/533 # + @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @pytest.mark.parametrize("alpha", ALPHA) @pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) @pytest.mark.parametrize("has_guess", HAS_GUESS) +@pytest.mark.parametrize("has_precomputed_vertex_out_weight", HAS_PRECOMPUTED) def test_pagerank( - graph_file, max_iter, tol, alpha, personalization_perc, has_guess + graph_file, max_iter, tol, alpha, personalization_perc, has_guess, + has_precomputed_vertex_out_weight ): - gc.collect() # NetworkX PageRank M = utils.read_csv_for_nx(graph_file) @@ -169,6 +184,7 @@ def test_pagerank( ) cu_nstart = None + pre_vtx_o_wgt = None if has_guess == 1: cu_nstart = cudify(networkx_pr) max_iter = 20 @@ -176,10 +192,20 @@ def test_pagerank( # cuGraph PageRank cu_M = utils.read_csv_file(graph_file) - G = cugraph.DiGraph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + G = cugraph.Graph(directed=True) + G.from_cudf_edgelist( + cu_M, source="0", destination="1", edge_attr="2", + legacy_renum_only=True) + + if has_precomputed_vertex_out_weight == 1: + df = G.view_edge_list()[["src", "weights"]] + pre_vtx_o_wgt = df.groupby( + ['src'], as_index=False).sum().rename( + columns={"src": "vertex", "weights": "sums"}) - cugraph_pr = cugraph_call(G, max_iter, tol, alpha, cu_prsn, cu_nstart) + cugraph_pr = cugraph_call( + G, max_iter, tol, alpha, cu_prsn, cu_nstart, + pre_vtx_o_wgt) # Calculating mismatch networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) @@ -204,7 +230,6 @@ def test_pagerank( def test_pagerank_nx( graph_file, max_iter, tol, alpha, personalization_perc, has_guess ): - gc.collect() # NetworkX PageRank M = utils.read_csv_for_nx(graph_file) @@ -249,10 +274,11 @@ def test_pagerank_nx( @pytest.mark.parametrize("alpha", ALPHA) @pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) @pytest.mark.parametrize("has_guess", HAS_GUESS) +@pytest.mark.parametrize("has_precomputed_vertex_out_weight", HAS_PRECOMPUTED) def test_pagerank_multi_column( - graph_file, max_iter, tol, alpha, personalization_perc, has_guess + graph_file, max_iter, tol, alpha, personalization_perc, has_guess, + has_precomputed_vertex_out_weight ): - gc.collect() # NetworkX PageRank M = utils.read_csv_for_nx(graph_file) @@ -268,6 +294,7 @@ def test_pagerank_multi_column( ) cu_nstart = None + pre_vtx_o_wgt = None if has_guess == 1: cu_nstart_temp = cudify(networkx_pr) max_iter = 100 @@ -292,11 +319,17 @@ def test_pagerank_multi_column( cu_M["dst_1"] = cu_M["dst_0"] + 1000 cu_M["weights"] = cudf.Series(M["weight"]) - cu_G = cugraph.DiGraph() + cu_G = cugraph.Graph(directed=True) cu_G.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], edge_attr="weights") + if has_precomputed_vertex_out_weight == 1: + df = cu_M[["src_0", "src_1", "weights"]] + pre_vtx_o_wgt = df.groupby( + ['src_0', "src_1"], as_index=False).sum().rename( + columns={"weights": "sums"}) + df = cugraph.pagerank( cu_G, alpha=alpha, @@ -304,6 +337,7 @@ def test_pagerank_multi_column( tol=tol, personalization=cu_prsn, nstart=cu_nstart, + precomputed_vertex_out_weight=pre_vtx_o_wgt ) cugraph_pr = [] @@ -326,3 +360,29 @@ def test_pagerank_multi_column( err = err + 1 print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr)) + + +def test_pagerank_invalid_personalization_dtype(): + input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / + "karate.csv").as_posix() + M = utils.read_csv_for_nx(input_data_path) + G = cugraph.Graph(directed=True) + cu_M = cudf.DataFrame() + cu_M["src"] = cudf.Series(M["0"]) + cu_M["dst"] = cudf.Series(M["1"]) + + cu_M["weights"] = cudf.Series(M["weight"]) + G.from_cudf_edgelist( + cu_M, source="src", destination="dst", edge_attr="weights" + ) + + personalization_vec = cudf.DataFrame() + personalization_vec['vertex'] = [17, 26] + personalization_vec['values'] = [0.5, 0.75] + warning_msg = ("PageRank requires 'personalization' values to match the " + "graph's 'edge_attr' type. edge_attr type is: " + "float32 and got 'personalization' values " + "of type: float64.") + + with pytest.warns(UserWarning, match=warning_msg): + cugraph.pagerank(G, personalization=personalization_vec) diff --git a/python/cugraph/cugraph/tests/test_paths.py b/python/cugraph/cugraph/tests/test_paths.py index 56cc9b3cd50..7aaa1146d8b 100644 --- a/python/cugraph/cugraph/tests/test_paths.py +++ b/python/cugraph/cugraph/tests/test_paths.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ import sys from tempfile import NamedTemporaryFile +import math import cudf from cupy.sparse import coo_matrix as cupy_coo_matrix @@ -153,9 +154,14 @@ def test_shortest_path_length_invalid_vertexes(graphs): def test_shortest_path_length_no_path(graphs): cugraph_G, nx_G, cupy_df = graphs + # FIXME: In case there is no path between two vertices, the + # result can be either the max of float32 or float64 + max_float_32 = (2 - math.pow(2, -23))*math.pow(2, 127) + path_1_to_8 = cugraph.shortest_path_length(cugraph_G, 1, 8) assert path_1_to_8 == sys.float_info.max - assert path_1_to_8 == cugraph.shortest_path_length(nx_G, "1", "8") + assert cugraph.shortest_path_length(nx_G, "1", "8") in \ + [max_float_32, path_1_to_8] assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8) diff --git a/python/cugraph/cugraph/utilities/nx_factory.py b/python/cugraph/cugraph/utilities/nx_factory.py index afbb0dbab2c..c491d63241c 100644 --- a/python/cugraph/cugraph/utilities/nx_factory.py +++ b/python/cugraph/cugraph/utilities/nx_factory.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,6 +52,9 @@ def convert_weighted_named_to_gdf(NX_G, weight): _gdf['dst'] = dst _gdf['weight'] = wt + # FIXME: The weight dtype is hardcoded. + _gdf = _gdf.astype({'weight': 'float32'}) + return _gdf diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index c5ae32a0b2a..b8c76173750 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -23,6 +23,7 @@ set(cython_sources katz_centrality.pyx node2vec.pyx pagerank.pyx + personalized_pagerank.pyx resource_handle.pyx sssp.pyx triangle_count.pyx diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index 7d604bf2dbb..3dc53352eab 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -33,6 +33,8 @@ from pylibcugraph.pagerank import pagerank +from pylibcugraph.personalized_pagerank import personalized_pagerank + from pylibcugraph.sssp import sssp from pylibcugraph.hits import hits diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd index 3aede21176c..8e8b1c8e923 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd @@ -56,11 +56,13 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error @@ -70,13 +72,15 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_personalized_pagerank( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, - cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, const cugraph_type_erased_device_array_view_t* personalization_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx index 17ac5876591..1a5d1a49801 100644 --- a/python/pylibcugraph/pylibcugraph/pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx @@ -14,6 +14,8 @@ # Have cython use python 3 syntax # cython: language_level = 3 +from libc.stdint cimport uintptr_t + from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, data_type_id_t, @@ -25,6 +27,7 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, ) from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, @@ -46,16 +49,20 @@ from pylibcugraph.utils cimport ( assert_success, assert_CAI_type, copy_to_cupy_array, + get_c_type_from_numpy_type, + create_cugraph_type_erased_device_array_view_from_py_obj, ) def pagerank(ResourceHandle resource_handle, _GPUGraph graph, + precomputed_vertex_out_weight_vertices, precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, double alpha, double epsilon, size_t max_iterations, - bool_t has_initial_guess, bool_t do_expensive_check): """ Find the PageRank score for every vertex in a graph by computing an @@ -70,20 +77,32 @@ def pagerank(ResourceHandle resource_handle, Handle to the underlying device resources needed for referencing data and running algorithms. - graph : SGGraph + graph : SGGraph or MGGraph The input graph. - precomputed_vertex_out_weight_sums : None - This parameter is unsupported in this release and only None is - accepted. + precomputed_vertex_out_weight_vertices: device array type + Subset of vertices of graph for precomputed_vertex_out_weight + (a performance optimization) + + precomputed_vertex_out_weight_sums : device array type + Corresponding precomputed sum of outgoing vertices weight + (a performance optimization) + + initial_guess_vertices : device array type + Subset of vertices of graph for initial guess for pagerank values + (a performance optimization) - alpha : float + initial_guess_values : device array type + Pagerank values for vertices + (a performance optimization) + + alpha : double The damping factor alpha represents the probability to follow an outgoing edge, standard value is 0.85. Thus, 1.0-alpha is the probability to “teleport” to a random vertex. Alpha should be greater than 0.0 and strictly lower than 1.0. - epsilon : float + epsilon : double Set the tolerance the approximation, this parameter should be a small magnitude value. The lower the tolerance the better the approximation. If this value is @@ -92,18 +111,14 @@ def pagerank(ResourceHandle resource_handle, numerical roundoff. Usually values between 0.01 and 0.00001 are acceptable. - max_iterations : int + max_iterations : size_t The maximum number of iterations before an answer is returned. This can be used to limit the execution time and do an early exit before the solver reaches the convergence tolerance. If this value is lower or equal to 0 cuGraph will use the default value, which is 100. - has_initial_guess : bool - This parameter is unsupported in this release and only False is - accepted. - - do_expensive_check : bool + do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. @@ -128,9 +143,8 @@ def pagerank(ResourceHandle resource_handle, ... resource_handle, graph_props, srcs, dsts, weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, pageranks) = pylibcugraph.pagerank( - ... resource_handle, G, None, alpha=0.85, epsilon=1.0e-6, - ... max_iterations=500, has_initial_guess=False, - ... do_expensive_check=False) + ... resource_handle, G, None, None, None, None, alpha=0.85, + ... epsilon=1.0e-6, max_iterations=500, do_expensive_check=False) >>> vertices array([0, 1, 2, 3], dtype=int32) >>> pageranks @@ -151,23 +165,31 @@ def pagerank(ResourceHandle resource_handle, raise RuntimeError("pagerank requires the numpy package, which could " "not be imported") - if has_initial_guess is True: - raise ValueError("has_initial_guess must be False for the current " - "release.") + cdef cugraph_type_erased_device_array_view_t* \ + initial_guess_vertices_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + initial_guess_vertices) - assert_CAI_type(precomputed_vertex_out_weight_sums, - "precomputed_vertex_out_weight_sums", - allow_None=True) - # FIXME: assert that precomputed_vertex_out_weight_sums type == weight type + cdef cugraph_type_erased_device_array_view_t* \ + initial_guess_values_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + initial_guess_values) cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ resource_handle.c_resource_handle_ptr cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_type_erased_device_array_view_t* \ + precomputed_vertex_out_weight_vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + precomputed_vertex_out_weight_vertices) + + # FIXME: assert that precomputed_vertex_out_weight_sums + # type == weight type cdef cugraph_type_erased_device_array_view_t* \ - precomputed_vertex_out_weight_sums_ptr = NULL - if precomputed_vertex_out_weight_sums: - raise NotImplementedError("None is temporarily the only supported " - "value for precomputed_vertex_out_weight_sums") + precomputed_vertex_out_weight_sums_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + precomputed_vertex_out_weight_sums) cdef cugraph_centrality_result_t* result_ptr cdef cugraph_error_code_t error_code @@ -175,11 +197,13 @@ def pagerank(ResourceHandle resource_handle, error_code = cugraph_pagerank(c_resource_handle_ptr, c_graph_ptr, + precomputed_vertex_out_weight_vertices_ptr, precomputed_vertex_out_weight_sums_ptr, + initial_guess_vertices_view_ptr, + initial_guess_values_view_ptr, alpha, epsilon, max_iterations, - has_initial_guess, do_expensive_check, &result_ptr, &error_ptr) diff --git a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx new file mode 100644 index 00000000000..7ada3dd8538 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx @@ -0,0 +1,252 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + data_type_id_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.centrality_algorithms cimport ( + cugraph_centrality_result_t, + cugraph_personalized_pagerank, + cugraph_centrality_result_get_vertices, + cugraph_centrality_result_get_values, + cugraph_centrality_result_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + assert_CAI_type, + copy_to_cupy_array, + get_c_type_from_numpy_type, + create_cugraph_type_erased_device_array_view_from_py_obj, +) + + +def personalized_pagerank(ResourceHandle resource_handle, + _GPUGraph graph, + precomputed_vertex_out_weight_vertices, + precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, + personalization_vertices, + personalization_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check): + """ + Find the PageRank score for every vertex in a graph by computing an + approximation of the Pagerank eigenvector using the power method. The + number of iterations depends on the properties of the network itself; it + increases when the tolerance descreases and/or alpha increases toward the + limiting value of 1. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph. + + precomputed_vertex_out_weight_vertices: device array type + Subset of vertices of graph for precomputed_vertex_out_weight + (a performance optimization) + + precomputed_vertex_out_weight_sums : device array type + Corresponding precomputed sum of outgoing vertices weight + (a performance optimization) + + initial_guess_vertices : device array type + Subset of vertices of graph for initial guess for pagerank values + (a performance optimization) + + initial_guess_values : device array type + Pagerank values for vertices + (a performance optimization) + + personalization_vertices : device array type + Subset of vertices of graph for personalization + (a performance optimization) + + personalization_values : device array type + Personalization values for vertices + (a performance optimization) + + alpha : double + The damping factor alpha represents the probability to follow an + outgoing edge, standard value is 0.85. + Thus, 1.0-alpha is the probability to “teleport” to a random vertex. + Alpha should be greater than 0.0 and strictly lower than 1.0. + + epsilon : double + Set the tolerance the approximation, this parameter should be a small + magnitude value. + The lower the tolerance the better the approximation. If this value is + 0.0f, cuGraph will use the default value which is 1.0E-5. + Setting too small a tolerance can lead to non-convergence due to + numerical roundoff. Usually values between 0.01 and 0.00001 are + acceptable. + + max_iterations : size_t + The maximum number of iterations before an answer is returned. This can + be used to limit the execution time and do an early exit before the + solver reaches the convergence tolerance. + If this value is lower or equal to 0 cuGraph will use the default + value, which is 100. + + do_expensive_check : bool_t + If True, performs more extensive tests on the inputs to ensure + validitity, at the expense of increased run time. + + Returns + ------- + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertex identifiers, and the second item is a device + array containing the pagerank values for the corresponding vertices. For + example, the vertex identifier at the ith element of the vertex array has + the pagerank value of the ith element in the pagerank array. + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> personalization_vertices = cupy.asarray([0, 2], dtype=numpy.int32) + >>> personalization_values = cupy.asarray( + ... [0.008309, 0.991691], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> (vertices, pageranks) = pylibcugraph.personalized_pagerank( + ... resource_handle, G, None, None, None, None, alpha=0.85, + ... personalization_vertices=personalization_vertices, + ... personalization_values=personalization_values, epsilon=1.0e-6, + ... max_iterations=500, + ... do_expensive_check=False) + >>> vertices + array([0, 1, 2, 3], dtype=int32) + >>> pageranks + array([0.00446455, 0.00379487, 0.53607565, 0.45566472 ], dtype=float32) + """ + + # FIXME: import these modules here for now until a better pattern can be + # used for optional imports (perhaps 'import_optional()' from cugraph), or + # these are made hard dependencies. + try: + import cupy + except ModuleNotFoundError: + raise RuntimeError("pagerank requires the cupy package, which could " + "not be imported") + try: + import numpy + except ModuleNotFoundError: + raise RuntimeError("pagerank requires the numpy package, which could " + "not be imported") + + cdef cugraph_type_erased_device_array_view_t* \ + initial_guess_vertices_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + initial_guess_vertices) + + cdef cugraph_type_erased_device_array_view_t* \ + initial_guess_values_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + initial_guess_values) + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_type_erased_device_array_view_t* \ + precomputed_vertex_out_weight_vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + precomputed_vertex_out_weight_vertices) + + # FIXME: assert that precomputed_vertex_out_weight_sums + # type == weight type + cdef cugraph_type_erased_device_array_view_t* \ + precomputed_vertex_out_weight_sums_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + precomputed_vertex_out_weight_sums) + + cdef cugraph_type_erased_device_array_view_t* \ + personalization_vertices_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + personalization_vertices) + + cdef cugraph_type_erased_device_array_view_t* \ + personalization_values_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + personalization_values) + + cdef cugraph_centrality_result_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + error_code = cugraph_personalized_pagerank(c_resource_handle_ptr, + c_graph_ptr, + precomputed_vertex_out_weight_vertices_ptr, + precomputed_vertex_out_weight_sums_ptr, + initial_guess_vertices_view_ptr, + initial_guess_values_view_ptr, + personalization_vertices_view_ptr, + personalization_values_view_ptr, + alpha, + epsilon, + max_iterations, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_personalized_pagerank") + + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ + cugraph_centrality_result_get_vertices(result_ptr) + cdef cugraph_type_erased_device_array_view_t* pageranks_ptr = \ + cugraph_centrality_result_get_values(result_ptr) + + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr) + + cugraph_centrality_result_free(result_ptr) + + return (cupy_vertices, cupy_pageranks) diff --git a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py index 4b3adb51233..b387e8cf58d 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py @@ -93,7 +93,7 @@ def test_sg_graph(graph_data): del g else: - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): SGGraph(resource_handle, graph_props, device_srcs, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py b/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py index aa61c607f3c..9c15101cb9d 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py @@ -95,16 +95,21 @@ def test_pagerank(sg_transposed_graph_objs): (expected_verts, expected_pageranks) = _test_data[ds_name] precomputed_vertex_out_weight_sums = None - has_initial_guess = False do_expensive_check = False + precomputed_vertex_out_weight_vertices = None + precomputed_vertex_out_weight_sums = None + initial_guess_vertices = None + initial_guess_values = None result = pagerank(resource_handle, g, + precomputed_vertex_out_weight_vertices, precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, _alpha, _epsilon, _max_iterations, - has_initial_guess, do_expensive_check) num_expected_verts = len(expected_verts) diff --git a/python/pylibcugraph/pylibcugraph/utils.pxd b/python/pylibcugraph/pylibcugraph/utils.pxd index 83f534c297e..5b461d93e76 100644 --- a/python/pylibcugraph/pylibcugraph/utils.pxd +++ b/python/pylibcugraph/pylibcugraph/utils.pxd @@ -50,3 +50,6 @@ cdef copy_to_cupy_array( cdef copy_to_cupy_array_ids( cugraph_resource_handle_t* c_resource_handle_ptr, cugraph_type_erased_device_array_view_t* device_array_view_ptr) + +cdef cugraph_type_erased_device_array_view_t* \ + create_cugraph_type_erased_device_array_view_from_py_obj(python_obj) diff --git a/python/pylibcugraph/pylibcugraph/utils.pyx b/python/pylibcugraph/pylibcugraph/utils.pyx index 8ae9e680c5d..962c50fc29c 100644 --- a/python/pylibcugraph/pylibcugraph/utils.pyx +++ b/python/pylibcugraph/pylibcugraph/utils.pyx @@ -27,28 +27,59 @@ from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_free, ) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_message, + cugraph_error_free +) + # FIXME: add tests for this cdef assert_success(cugraph_error_code_t code, cugraph_error_t* err, api_name): if code != cugraph_error_code_t.CUGRAPH_SUCCESS: + c_error = cugraph_error_message(err) + if isinstance(c_error, bytes): + c_error = c_error.decode() + else: + c_error = str(c_error) + + cugraph_error_free(err) + if code == cugraph_error_code_t.CUGRAPH_UNKNOWN_ERROR: code_str = "CUGRAPH_UNKNOWN_ERROR" + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise RuntimeError(error_msg) elif code == cugraph_error_code_t.CUGRAPH_INVALID_HANDLE: code_str = "CUGRAPH_INVALID_HANDLE" + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise ValueError(error_msg) elif code == cugraph_error_code_t.CUGRAPH_ALLOC_ERROR: code_str = "CUGRAPH_ALLOC_ERROR" + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise MemoryError(error_msg) elif code == cugraph_error_code_t.CUGRAPH_INVALID_INPUT: code_str = "CUGRAPH_INVALID_INPUT" + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise ValueError(error_msg) elif code == cugraph_error_code_t.CUGRAPH_NOT_IMPLEMENTED: code_str = "CUGRAPH_NOT_IMPLEMENTED" + error_msg = f"non-success value returned from {api_name}: {code_str}\ "\ + f"{c_error}" + raise NotImplementedError(error_msg) elif code == cugraph_error_code_t.CUGRAPH_UNSUPPORTED_TYPE_COMBINATION: code_str = "CUGRAPH_UNSUPPORTED_TYPE_COMBINATION" + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise ValueError(error_msg) else: code_str = "unknown error code" - # FIXME: extract message using cugraph_error_message() - # FIXME: If error_ptr has a value, free it using cugraph_error_free() - raise RuntimeError(f"non-success value returned from {api_name}: {code_str}") + error_msg = f"non-success value returned from {api_name}: {code_str} "\ + f"{c_error}" + raise RuntimeError(error_msg) cdef assert_CAI_type(obj, var_name, allow_None=False): @@ -188,3 +219,16 @@ cdef copy_to_cupy_array_ids( return cupy_array +cdef cugraph_type_erased_device_array_view_t* \ + create_cugraph_type_erased_device_array_view_from_py_obj(python_obj): + cdef uintptr_t cai_ptr = NULL + cdef cugraph_type_erased_device_array_view_t* view_ptr = NULL + if python_obj is not None: + cai_ptr = python_obj.__cuda_array_interface__["data"][0] + view_ptr = cugraph_type_erased_device_array_view_create( + cai_ptr, + len(python_obj), + get_c_type_from_numpy_type(python_obj.dtype)) + + return view_ptr +