diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index bab521f485d..7c9cd0258a4 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -8,6 +8,11 @@ "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" + ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index d225f15f755..a4dc168505b 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -8,10 +8,15 @@ "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" + ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.14.1" + "version": "1.15.0" }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "11.8", diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index bcaabab572b..eae4967f3b2 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -8,6 +8,11 @@ "BASE": "rapidsai/devcontainers:24.06-cpp-mambaforge-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" + ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index e472f4621f9..393a5c63d23 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -8,10 +8,15 @@ "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" + ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.14.1" + "version": "1.15.0" }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "12.2", diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 9a7324fb330..f5c14e8d315 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -105,6 +105,7 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}" done sed_runner "s/:[0-9][0-9]\.[0-9][0-9]/:${NEXT_SHORT_TAG}/" ./notebooks/README.md diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index e98bf4ab56b..f45112dd80b 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -33,7 +33,7 @@ else fi rapids-logger "Installing PyTorch and PyG dependencies" rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL} -rapids-retry python -m pip install torch-geometric==2.4.0 +rapids-retry python -m pip install "torch-geometric>=2.5,<2.6" rapids-retry python -m pip install \ ogb \ pyg_lib \ diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 818616c2f5b..c02e8391eb2 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -34,7 +34,7 @@ requirements: - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - - pyg >=2.3,<2.5 + - pyg >=2.5,<2.6 tests: imports: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb6f348b380..57e0aa2d078 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -77,6 +77,16 @@ rapids_find_package(CUDAToolkit REQUIRED INSTALL_EXPORT_SET cugraph-exports ) +if (BUILD_CUGRAPH_MTMG_TESTS) + if(NOT TARGET ucx::ucp) + find_package(ucx REQUIRED) + endif() + + if(NOT TARGET ucxx::ucxx) + find_package(ucxx REQUIRED) + endif() +endif() + set(CUGRAPH_C_FLAGS "") set(CUGRAPH_CXX_FLAGS "") set(CUGRAPH_CUDA_FLAGS "") @@ -145,11 +155,6 @@ if(USE_CUGRAPH_OPS) include(cmake/thirdparty/get_libcugraphops.cmake) endif() - -if (BUILD_CUGRAPH_MTMG_TESTS) - include(cmake/thirdparty/get_ucp.cmake) -endif() - if(BUILD_TESTS) include(${rapids-cmake-dir}/cpm/gtest.cmake) rapids_cpm_gtest(BUILD_STATIC) @@ -283,9 +288,12 @@ set(CUGRAPH_SOURCES src/structure/symmetrize_edgelist_mg.cu src/community/triangle_count_sg.cu src/community/triangle_count_mg.cu + src/community/approx_weighted_matching_sg.cu + src/community/approx_weighted_matching_mg.cu src/traversal/k_hop_nbrs_sg.cu src/traversal/k_hop_nbrs_mg.cu src/mtmg/vertex_result.cu + src/mtmg/vertex_pairs_result.cu ) if(USE_CUGRAPH_OPS) diff --git a/cpp/cmake/thirdparty/get_ucp.cmake b/cpp/cmake/thirdparty/get_ucp.cmake deleted file mode 100644 index dcc4956a34e..00000000000 --- a/cpp/cmake/thirdparty/get_ucp.cmake +++ /dev/null @@ -1,35 +0,0 @@ -#============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= - -function(find_and_configure_ucp) - - if(TARGET UCP::UCP) - return() - endif() - - rapids_find_generate_module(UCP - HEADER_NAMES ucp.h - LIBRARY_NAMES ucp - INCLUDE_SUFFIXES ucp/api - ) - - # Currently UCP has no CMake build-system so we require - # it built and installed on the machine already - rapids_find_package(UCP REQUIRED) - -endfunction() - -find_and_configure_ucp() diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 0caa151daac..7c4a978c4b4 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2368,6 +2368,32 @@ rmm::device_uvector vertex_coloring( graph_view_t const& graph_view, raft::random::RngState& rng_state); +/* + * @brief Approximate Weighted Matching + * + * A matching in an undirected graph G = (V, E) is a pairing of adjacent vertices + * such that each vertex is matched with at most one other vertex, the objective + * being to match as many vertices as possible or to maximise the sum of the + * weights of the matched edges. Here we provide an implementation of an + * approximation algorithm to the weighted Maximum matching. See + * https://web.archive.org/web/20081031230449id_/http://www.ii.uib.no/~fredrikm/fredrik/papers/CP75.pdf + * for further information. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, + * and handles to various CUDA libraries) to run graph algorithms. + * @param[in] graph_view Graph view object. + * @param[in] edge_weight_view View object holding edge weights for @p graph_view. + * @return A tuple of device vector of matched vertex ids and sum of the weights of the matched + * edges. + */ +template +std::tuple, weight_t> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); } // namespace cugraph /** diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index 1353302f825..69d48098a5d 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -213,12 +213,12 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( * * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, * and handles to various CUDA libraries) to run graph algorithms. - * @param[in/out] d_edgelist_majors Vertex IDs for sources (if we are internally storing edges in + * @param[in,out] d_edgelist_majors Vertex IDs for sources (if we are internally storing edges in * the sparse 2D matrix using sources as major indices) or destinations (otherwise) - * @param[in/out] d_edgelist_minors Vertex IDs for destinations (if we are internally storing edges + * @param[in,out] d_edgelist_minors Vertex IDs for destinations (if we are internally storing edges * in the sparse 2D matrix using sources as major indices) or sources (otherwise) - * @param[in/out] d_edgelist_weights Optional edge weights - * @param[in/out] d_edgelist_id_type_pairs Optional edge (ID, type) pairs + * @param[in,out] d_edgelist_weights Optional edge weights + * @param[in,out] d_edgelist_id_type_pairs Optional edge (ID, type) pairs * @param[in] groupby_and_count_local_partition_by_minor If set to true, groupby and count edges * based on (local partition ID, GPU ID) pairs (where GPU IDs are computed by applying the * compute_gpu_id_from_vertex_t function to the minor vertex ID). If set to false, groupby and count diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp index caba9e91d8b..82a7a3fae25 100644 --- a/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp @@ -25,12 +25,7 @@ namespace mtmg { namespace detail { /** - * @brief Wrap an object to be available for each GPU - * - * In the MTMG environment we need the ability to manage a collection of objects - * that are associated with a particular GPU, and fetch the objects from an - * arbitrary GPU thread. This object will wrap any object and allow it to be - * accessed from different threads. + * @brief Manage device spans on each GPU */ template using device_shared_device_span_t = device_shared_wrapper_t>; diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_span_tuple.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_span_tuple.hpp new file mode 100644 index 00000000000..2d28cbadc24 --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_span_tuple.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief Manage a tuple of device spans on each GPU + */ +template +using device_shared_device_span_tuple_t = + device_shared_wrapper_t...>>; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp index 969d32aa313..cfb746551ef 100644 --- a/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp @@ -25,12 +25,10 @@ namespace mtmg { namespace detail { /** - * @brief Wrap an object to be available for each GPU + * @brief Manage a device vector on each GPU * - * In the MTMG environment we need the ability to manage a collection of objects - * that are associated with a particular GPU, and fetch the objects from an - * arbitrary GPU thread. This object will wrap any object and allow it to be - * accessed from different threads. + * Uses the device_shared_wrapper to manage an rmm::device_uvector on + * each GPU. */ template class device_shared_device_vector_t : public device_shared_wrapper_t> { diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_vector_tuple.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector_tuple.hpp new file mode 100644 index 00000000000..8d93d1ac39a --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector_tuple.hpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief Manage a tuple of device vector on each GPU + * + * Uses the device_shared_wrapper to manage a tuple of rmm::device_uvector + * instances on each GPU. + */ +template +class device_shared_device_vector_tuple_t + : public device_shared_wrapper_t...>> { + using parent_t = detail::device_shared_wrapper_t...>>; + + public: + /** + * @brief Create a device_shared_device_span (read only view) + */ + auto view() + { + std::lock_guard lock(parent_t::lock_); + + device_shared_device_span_tuple_t result; + + std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result, this](auto& p) { + convert_to_span(std::index_sequence_for(), result, p); + // std::size_t Is... = std::index_sequence_for; + // result.set(p.first, std::make_tuple(raft::device_span{std::get(p.second).data(), std::get(p.second).size()}...)); + }); + + return result; + } + + private: + template + void convert_to_span(std::index_sequence, + device_shared_device_span_tuple_t& result, + std::pair...>>& p) + { + result.set(p.first, + std::make_tuple(raft::device_span{std::get(p.second).data(), + std::get(p.second).size()}...)); + } +}; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/vertex_pair_result.hpp b/cpp/include/cugraph/mtmg/vertex_pair_result.hpp new file mode 100644 index 00000000000..2049b76c4b9 --- /dev/null +++ b/cpp/include/cugraph/mtmg/vertex_pair_result.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device vector for storing vertex results + */ +template +class vertex_pair_result_t + : public detail::device_shared_device_vector_tuple_t { + using parent_t = detail::device_shared_device_vector_tuple_t; + + public: + /** + * @brief Create a vertex result view (read only) + */ + auto view() { return vertex_pair_result_view_t(this->parent_t::view()); } +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/vertex_pair_result_view.hpp b/cpp/include/cugraph/mtmg/vertex_pair_result_view.hpp new file mode 100644 index 00000000000..8e51ef2de3c --- /dev/null +++ b/cpp/include/cugraph/mtmg/vertex_pair_result_view.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device span for referencing a vertex pair result + */ +template +class vertex_pair_result_view_t + : public detail::device_shared_device_span_tuple_t { + using parent_t = detail::device_shared_device_span_tuple_t; + + public: + vertex_pair_result_view_t(parent_t&& other) : parent_t{std::move(other)} {} + + /** + * @brief Gather results from specified vertices + */ + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + gather(handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + cugraph::vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index cb3d6b6375a..b6f59333805 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -121,7 +121,7 @@ cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle, * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] max_level Maximum level in hierarchy * @param [in] resolution Resolution parameter (gamma) in modularity formula. * This changes the size of the communities. Higher resolutions @@ -181,7 +181,7 @@ void cugraph_hierarchical_clustering_result_free(cugraph_hierarchical_clustering * @brief Compute ECG clustering * * @param [in] handle Handle for accessing resources - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] min_weight Minimum edge weight in final graph diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 94b06189796..ff7e439232a 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -229,6 +229,26 @@ cugraph_error_code_t cugraph_allgather(const cugraph_resource_handle_t* handle, cugraph_induced_subgraph_result_t** result, cugraph_error_t** error); +/** + * @brief Count multi_edges + * + * Count the number of multi-edges in the graph + * + * @param [in] handle Handle for accessing resources. + * @param [in] graph Pointer to graph + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Where to store the count of multi-edges + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_count_multi_edges(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + size_t* result, + cugraph_error_t** error); + /** * @brief Opaque degree result type */ diff --git a/cpp/include/cugraph_c/graph_generators.h b/cpp/include/cugraph_c/graph_generators.h index 36cded27ba6..272131d2aab 100644 --- a/cpp/include/cugraph_c/graph_generators.h +++ b/cpp/include/cugraph_c/graph_generators.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,7 +120,7 @@ void cugraph_coo_list_free(cugraph_coo_list_t* coo_list); * Vertex types will be int32 if scale < 32 and int64 if scale >= 32 * * @param [in] handle Handle for accessing resources - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] scale Scale factor to set the number of vertices in the graph. Vertex IDs have * values in [0, V), where V = 1 << @p scale. * @param [in] num_edges Number of edges to generate. @@ -164,7 +164,7 @@ cugraph_error_code_t cugraph_generate_rmat_edgelist(const cugraph_resource_handl * Vertex types will be int32 if scale < 32 and int64 if scale >= 32 * * @param [in] handle Handle for accessing resources - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] n_edgelists Number of edge lists (graphs) to generate * @param [in] min_scale Scale factor to set the minimum number of verties in the graph. * @param [in] max_scale Scale factor to set the maximum number of verties in the graph. @@ -204,8 +204,8 @@ cugraph_error_code_t cugraph_generate_rmat_edgelists( * Updates a COO to contain random edge weights * * @param [in] handle Handle for accessing resources - * @param [in/out] rng_state State of the random number generator, updated with each call - * @param [in/out] coo Opaque pointer to the coo, weights will be added (overwriting + * @param [in,out] rng_state State of the random number generator, updated with each call + * @param [in,out] coo Opaque pointer to the coo, weights will be added (overwriting * any existing weights) * @param [in] dtype The type of weight to generate (FLOAT32 or FLOAT64), ignored * unless include_weights is true @@ -229,7 +229,7 @@ cugraph_error_code_t cugraph_generate_edge_weights(const cugraph_resource_handle * edges * * @param [in] handle Handle for accessing resources - * @param [in/out] coo Opaque pointer to the coo, weights will be added (overwriting + * @param [in,out] coo Opaque pointer to the coo, weights will be added (overwriting * any existing weights) * @param [in] multi_gpu Flag if the COO is being created on multiple GPUs * @param [out] error Pointer to an error object storing details of any error. Will @@ -246,8 +246,8 @@ cugraph_error_code_t cugraph_generate_edge_ids(const cugraph_resource_handle_t* * Updates a COO to contain edge types. Edges types will be randomly generated. * * @param [in] handle Handle for accessing resources - * @param [in/out] rng_state State of the random number generator, updated with each call - * @param [in/out] coo Opaque pointer to the coo, weights will be added (overwriting + * @param [in,out] rng_state State of the random number generator, updated with each call + * @param [in,out] coo Opaque pointer to the coo, weights will be added (overwriting * any existing weights) * @param [in] max_edge_type Edge types will be randomly generated between min_edge_type * and max_edge_type diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 859eaca7f3b..35f60e195be 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -348,7 +348,7 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options); * parameter is only used with the retain_seeds option. * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm. * We only support fanout values of type INT32 - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] sampling_options * Opaque pointer defining the sampling options. * @param [in] do_expensive_check @@ -599,7 +599,7 @@ cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( * * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph - * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] num_vertices Number of vertices to sample * @param [out] vertices Device array view to populate label * @param [out] error Pointer to an error object storing details of diff --git a/cpp/include/cugraph_c/traversal_algorithms.h b/cpp/include/cugraph_c/traversal_algorithms.h index e25fa167e43..166205ce281 100644 --- a/cpp/include/cugraph_c/traversal_algorithms.h +++ b/cpp/include/cugraph_c/traversal_algorithms.h @@ -88,7 +88,7 @@ void cugraph_paths_result_free(cugraph_paths_result_t* result); * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph * FIXME: Make this just [in], copy it if I need to temporarily modify internally - * @param [in/out] sources Array of source vertices. NOTE: Array might be modified if + * @param [in,out] sources Array of source vertices. NOTE: Array might be modified if * renumbering is enabled for the graph * @param [in] direction_optimizing If set to true, this algorithm switches between the push based * breadth-first search and pull based breadth-first search depending on the size of the diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 35f7086d726..91371b988b3 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -214,6 +214,44 @@ struct two_hop_neighbors_functor : public cugraph::c_api::abstract_functor { } }; +struct count_multi_edges_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_{}; + cugraph::c_api::cugraph_graph_t* graph_{nullptr}; + size_t result_{}; + bool do_expensive_check_{false}; + + count_multi_edges_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + auto graph = + reinterpret_cast*>( + graph_->graph_); + + auto graph_view = graph->view(); + + result_ = static_cast(graph_view.count_multi_edges(handle_)); + } + } +}; + } // namespace extern "C" cugraph_error_code_t cugraph_create_vertex_pairs( @@ -281,3 +319,14 @@ extern "C" cugraph_error_code_t cugraph_two_hop_neighbors( return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +extern "C" cugraph_error_code_t cugraph_count_multi_edges(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + size_t* result, + cugraph_error_t** error) +{ + count_multi_edges_functor functor(handle, graph, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh new file mode 100644 index 00000000000..e693beee489 --- /dev/null +++ b/cpp/src/community/approx_weighted_matching_impl.cuh @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "prims/fill_edge_property.cuh" +#include "prims/reduce_op.cuh" +#include "prims/transform_e.cuh" +#include "prims/transform_reduce_e_by_src_dst_key.cuh" +#include "prims/update_edge_src_dst_property.cuh" +#include "utilities/collect_comm.cuh" + +#include +#include +#include + +#include + +#include + +namespace cugraph { + +namespace detail { + +template +std::tuple, weight_t> approximate_weighted_matching( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + edge_property_view_t edge_weight_view) +{ + CUGRAPH_EXPECTS(graph_view.is_symmetric(), + "Invalid input arguments: input graph for approximate_weighted_matching must " + "need to be symmetric"); + + using graph_view_t = cugraph::graph_view_t; + + graph_view_t current_graph_view(graph_view); + if (current_graph_view.has_edge_mask()) { current_graph_view.clear_edge_mask(); } + + cugraph::edge_property_t edge_masks_even(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + cugraph::edge_property_t edge_masks_odd(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + + if (graph_view.has_edge_mask()) { + current_graph_view.attach_edge_mask(*(graph_view.edge_mask_view())); + } + // Mask out self-loop + cugraph::transform_e( + handle, + current_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return !(src == dst); + }, + edge_masks_even.mutable_view()); + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + current_graph_view.attach_edge_mask(edge_masks_even.view()); + + auto constexpr invalid_partner = invalid_vertex_id::value; + rmm::device_uvector offers_from_partners( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + rmm::device_uvector partners(current_graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + + thrust::fill(handle.get_thrust_policy(), partners.begin(), partners.end(), invalid_partner); + thrust::fill(handle.get_thrust_policy(), + offers_from_partners.begin(), + offers_from_partners.end(), + weight_t{0.0}); + + rmm::device_uvector local_vertices( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + detail::sequence_fill(handle.get_stream(), + local_vertices.begin(), + local_vertices.size(), + current_graph_view.local_vertex_partition_range_first()); + + edge_src_property_t src_key_cache(handle); + cugraph::edge_src_property_t src_match_flags(handle); + cugraph::edge_dst_property_t dst_match_flags(handle); + + if constexpr (graph_view_t::is_multi_gpu) { + src_key_cache = edge_src_property_t(handle, current_graph_view); + + update_edge_src_property(handle, current_graph_view, local_vertices.begin(), src_key_cache); + + src_match_flags = cugraph::edge_src_property_t(handle, current_graph_view); + dst_match_flags = cugraph::edge_dst_property_t(handle, current_graph_view); + } + + vertex_t loop_counter = 0; + while (true) { + // + // For each candidate vertex, find the best possible target + // + + rmm::device_uvector candidates(0, handle.get_stream()); + rmm::device_uvector offers_from_candidates(0, handle.get_stream()); + rmm::device_uvector targets(0, handle.get_stream()); + + // FIXME: This can be implemented more efficiently if per_v_transform_reduce_incoming|outgoing_e + // is updated to support reduction on thrust::tuple. + std::forward_as_tuple(candidates, std::tie(offers_from_candidates, targets)) = + cugraph::transform_reduce_e_by_src_key( + handle, + current_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_weight_view, + graph_view_t::is_multi_gpu + ? src_key_cache.view() + : detail::edge_major_property_view_t(local_vertices.begin()), + [] __device__(auto, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wt) { + return thrust::make_tuple(wt, dst); + }, + thrust::make_tuple(weight_t{0.0}, invalid_partner), + reduce_op::maximum>{}, + true); + + // + // For each target, find the best offer + // + + if constexpr (graph_view_t::is_multi_gpu) { + auto vertex_partition_range_lasts = current_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto key_func = cugraph::detail::compute_gpu_id_from_int_vertex_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + major_comm_size, + minor_comm_size}; + + std::forward_as_tuple(std::tie(candidates, offers_from_candidates, targets), std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator(thrust::make_tuple( + candidates.begin(), offers_from_candidates.begin(), targets.begin())), + thrust::make_zip_iterator( + thrust::make_tuple(candidates.end(), offers_from_candidates.end(), targets.end())), + [key_func] __device__(auto val) { return key_func(thrust::get<2>(val)); }, + handle.get_stream()); + } + + auto itr_to_tuples = thrust::make_zip_iterator( + thrust::make_tuple(offers_from_candidates.begin(), candidates.begin())); + + thrust::sort_by_key(handle.get_thrust_policy(), targets.begin(), targets.end(), itr_to_tuples); + + auto nr_unique_targets = thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(targets.size()), + is_first_in_run_t{targets.data()}); + + rmm::device_uvector unique_targets(nr_unique_targets, handle.get_stream()); + rmm::device_uvector best_offers_to_targets(nr_unique_targets, handle.get_stream()); + rmm::device_uvector best_candidates(nr_unique_targets, handle.get_stream()); + + auto itr_to_reduced_tuples = thrust::make_zip_iterator( + thrust::make_tuple(best_offers_to_targets.begin(), best_candidates.begin())); + + auto new_end = thrust::reduce_by_key( + handle.get_thrust_policy(), + targets.begin(), + targets.end(), + itr_to_tuples, + unique_targets.begin(), + itr_to_reduced_tuples, + thrust::equal_to{}, + [] __device__(auto pair1, auto pair2) { return (pair1 > pair2) ? pair1 : pair2; }); + + vertex_t nr_reduces_tuples = + static_cast(thrust::distance(unique_targets.begin(), new_end.first)); + + targets = std::move(unique_targets); + offers_from_candidates = std::move(best_offers_to_targets); + candidates = std::move(best_candidates); + + // + // two vertex offer each other, that's a match + // + + kv_store_t target_candidate_map(targets.begin(), + targets.end(), + candidates.begin(), + invalid_vertex_id::value, + invalid_vertex_id::value, + handle.get_stream()); + + rmm::device_uvector candidates_of_candidates(0, handle.get_stream()); + + if (graph_view_t::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto partitions_range_lasts = graph_view.vertex_partition_range_lasts(); + rmm::device_uvector d_partitions_range_lasts(partitions_range_lasts.size(), + handle.get_stream()); + + raft::update_device(d_partitions_range_lasts.data(), + partitions_range_lasts.data(), + partitions_range_lasts.size(), + handle.get_stream()); + + cugraph::detail::compute_gpu_id_from_int_vertex_t vertex_to_gpu_id_op{ + raft::device_span(d_partitions_range_lasts.data(), + d_partitions_range_lasts.size()), + major_comm_size, + minor_comm_size}; + + candidates_of_candidates = cugraph::collect_values_for_keys(handle, + target_candidate_map.view(), + candidates.begin(), + candidates.end(), + vertex_to_gpu_id_op); + } else { + candidates_of_candidates.resize(candidates.size(), handle.get_stream()); + + target_candidate_map.view().find(candidates.begin(), + candidates.end(), + candidates_of_candidates.begin(), + handle.get_stream()); + } + + // + // Mask out neighborhood of matched vertices + // + + rmm::device_uvector is_vertex_matched = rmm::device_uvector( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), is_vertex_matched.begin(), is_vertex_matched.end(), bool{false}); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(candidates_of_candidates.begin(), + targets.begin(), + candidates.begin(), + offers_from_candidates.begin())), + thrust::make_zip_iterator(thrust::make_tuple(candidates_of_candidates.end(), + targets.end(), + candidates.end(), + offers_from_candidates.end())), + [partners = partners.begin(), + offers_from_partners = offers_from_partners.begin(), + is_vertex_matched = + raft::device_span(is_vertex_matched.data(), is_vertex_matched.size()), + v_first = + current_graph_view.local_vertex_partition_range_first()] __device__(auto msrc_tgt) { + auto candidate_of_candidate = thrust::get<0>(msrc_tgt); + auto tgt = thrust::get<1>(msrc_tgt); + auto candiate = thrust::get<2>(msrc_tgt); + auto offer_value = thrust::get<3>(msrc_tgt); + + if (candidate_of_candidate != invalid_partner && candidate_of_candidate == tgt) { + auto tgt_offset = tgt - v_first; + is_vertex_matched[tgt_offset] = true; + partners[tgt_offset] = candiate; + offers_from_partners[tgt_offset] = offer_value; + } + }); + + if (current_graph_view.compute_number_of_edges(handle) == 0) { break; } + + if constexpr (graph_view_t::is_multi_gpu) { + cugraph::update_edge_src_property( + handle, current_graph_view, is_vertex_matched.begin(), src_match_flags); + cugraph::update_edge_dst_property( + handle, current_graph_view, is_vertex_matched.begin(), dst_match_flags); + } + + if (loop_counter % 2 == 0) { + if constexpr (graph_view_t::is_multi_gpu) { + cugraph::transform_e( + handle, + current_graph_view, + src_match_flags.view(), + dst_match_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + return !((is_src_matched == true) || (is_dst_matched == true)); + }, + edge_masks_odd.mutable_view()); + } else { + cugraph::transform_e( + handle, + current_graph_view, + detail::edge_major_property_view_t(is_vertex_matched.begin()), + detail::edge_minor_property_view_t(is_vertex_matched.begin(), + vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + return !((is_src_matched == true) || (is_dst_matched == true)); + }, + edge_masks_odd.mutable_view()); + } + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + current_graph_view.attach_edge_mask(edge_masks_odd.view()); + } else { + if constexpr (graph_view_t::is_multi_gpu) { + cugraph::transform_e( + handle, + current_graph_view, + src_match_flags.view(), + dst_match_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + return !((is_src_matched == true) || (is_dst_matched == true)); + }, + edge_masks_even.mutable_view()); + } else { + cugraph::transform_e( + handle, + current_graph_view, + detail::edge_major_property_view_t(is_vertex_matched.begin()), + detail::edge_minor_property_view_t(is_vertex_matched.begin(), + vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + return !((is_src_matched == true) || (is_dst_matched == true)); + }, + edge_masks_even.mutable_view()); + } + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + current_graph_view.attach_edge_mask(edge_masks_even.view()); + } + + loop_counter++; + } + + weight_t sum_matched_edge_weights = thrust::reduce( + handle.get_thrust_policy(), offers_from_partners.begin(), offers_from_partners.end()); + + if constexpr (graph_view_t::is_multi_gpu) { + sum_matched_edge_weights = host_scalar_allreduce( + handle.get_comms(), sum_matched_edge_weights, raft::comms::op_t::SUM, handle.get_stream()); + } + + return std::make_tuple(std::move(partners), sum_matched_edge_weights / 2.0); +} +} // namespace detail + +template +std::tuple, weight_t> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view) +{ + return detail::approximate_weighted_matching(handle, graph_view, edge_weight_view); +} + +} // namespace cugraph diff --git a/cpp/src/community/approx_weighted_matching_mg.cu b/cpp/src/community/approx_weighted_matching_mg.cu new file mode 100644 index 00000000000..41d6c3d97e0 --- /dev/null +++ b/cpp/src/community/approx_weighted_matching_mg.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "approx_weighted_matching_impl.cuh" + +namespace cugraph { + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +} // namespace cugraph diff --git a/cpp/src/community/approx_weighted_matching_sg.cu b/cpp/src/community/approx_weighted_matching_sg.cu new file mode 100644 index 00000000000..418a43d51ae --- /dev/null +++ b/cpp/src/community/approx_weighted_matching_sg.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "approx_weighted_matching_impl.cuh" + +namespace cugraph { + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, float> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +template std::tuple, double> approximate_weighted_matching( + raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view); + +} // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index c13259f0da7..00f7bc6cbe7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -21,6 +21,7 @@ #include "prims/update_edge_src_dst_property.cuh" #include "utilities/error_check_utils.cuh" +#include #include #include @@ -365,6 +366,24 @@ all_pairs_similarity(raft::handle_t const& handle, v1.resize(new_size, handle.get_stream()); v2.resize(new_size, handle.get_stream()); + if constexpr (multi_gpu) { + // shuffle vertex pairs + auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + + std::tie(v1, v2, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(v1), + std::move(v2), + std::nullopt, + std::nullopt, + std::nullopt, + vertex_partition_range_lasts); + } + auto score = similarity(handle, graph_view, @@ -537,6 +556,24 @@ all_pairs_similarity(raft::handle_t const& handle, v1.resize(new_size, handle.get_stream()); v2.resize(new_size, handle.get_stream()); + if constexpr (multi_gpu) { + // shuffle vertex pairs + auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + + std::tie(v1, v2, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(v1), + std::move(v2), + std::nullopt, + std::nullopt, + std::nullopt, + vertex_partition_range_lasts); + } + auto score = similarity(handle, graph_view, diff --git a/cpp/src/mtmg/vertex_pairs_result.cu b/cpp/src/mtmg/vertex_pairs_result.cu new file mode 100644 index 00000000000..55b855d676d --- /dev/null +++ b/cpp/src/mtmg/vertex_pairs_result.cu @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/graph_partition_utils.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace mtmg { + +template +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view) +{ + // FIXME: Should this handle the case of multiple local host threads? + // It currently does not. If vertices were a raft::host_span + // We could have the host threads copy the data to a device_uvector + // and then have rank 0 execute this logic, and we could copy to + // host at the end. + auto stream = handle.raft_handle().get_stream(); + + rmm::device_uvector local_vertices(vertices.size(), stream); + rmm::device_uvector vertex_gpu_ids(vertices.size(), stream); + + raft::copy(local_vertices.data(), vertices.data(), vertices.size(), stream); + cugraph::detail::scalar_fill( + stream, vertex_gpu_ids.data(), vertex_gpu_ids.size(), handle.get_rank()); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + stream); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + stream); + + if (renumber_map_view) { + cugraph::renumber_ext_vertices( + handle.raft_handle(), + local_vertices.data(), + local_vertices.size(), + renumber_map_view->get(handle).data(), + vertex_partition_view.local_vertex_partition_range_first(), + vertex_partition_view.local_vertex_partition_range_last()); + } + + auto const major_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::major_comm_name()).get_size(); + auto const minor_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); + + std::tie(local_vertices, vertex_gpu_ids, std::ignore) = groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + local_vertices.begin(), + local_vertices.end(), + vertex_gpu_ids.begin(), + cugraph::detail::compute_gpu_id_from_int_vertex_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + major_comm_size, + minor_comm_size}, + stream); + + // + // LOOK AT THIS... + // I think the above shuffle is correct... + // This will give us vertex/gpu_id tuples on the GPU that vertex is assigned + // to. I need to take this and filter the device vector tuples based on the desired + // vertex (v1). + // + + // + // Now gather + // + auto& wrapped = this->get(handle); + + rmm::device_uvector v1(std::get<0>(wrapped).size(), stream); + rmm::device_uvector v2(std::get<0>(wrapped).size(), stream); + rmm::device_uvector result(std::get<0>(wrapped).size(), stream); + + thrust::copy( + rmm::exec_policy(stream), + thrust::make_zip_iterator( + std::get<0>(wrapped).begin(), std::get<1>(wrapped).begin(), std::get<2>(wrapped).begin()), + thrust::make_zip_iterator( + std::get<0>(wrapped).end(), std::get<1>(wrapped).end(), std::get<2>(wrapped).end()), + thrust::make_zip_iterator(v1.begin(), v2.begin(), result.begin())); + + thrust::sort_by_key( + rmm::exec_policy(stream), local_vertices.begin(), local_vertices.end(), vertex_gpu_ids.begin()); + + auto new_end = + thrust::remove_if(rmm::exec_policy(stream), + thrust::make_zip_iterator(v1.begin(), v2.begin(), result.begin()), + thrust::make_zip_iterator(v1.end(), v2.end(), result.end()), + [v1_check = raft::device_span{ + local_vertices.data(), local_vertices.size()}] __device__(auto tuple) { + return thrust::binary_search( + thrust::seq, v1_check.begin(), v1_check.end(), thrust::get<0>(tuple)); + }); + + v1.resize( + thrust::distance(thrust::make_zip_iterator(v1.begin(), v2.begin(), result.begin()), new_end), + stream); + v2.resize(v1.size(), stream); + result.resize(v1.size(), stream); + + // + // Shuffle back + // + std::forward_as_tuple(std::ignore, std::tie(v1, v2, result), std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + v1.begin(), + v1.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin(), result.begin()), + cuda::proclaim_return_type( + [local_v = raft::device_span{local_vertices.data(), local_vertices.size()}, + gpu = raft::device_span{vertex_gpu_ids.data(), + vertex_gpu_ids.size()}] __device__(auto v1) { + return gpu[thrust::distance( + local_v.begin(), thrust::lower_bound(thrust::seq, local_v.begin(), local_v.end(), v1))]; + }), + stream); + + if (renumber_map_view) { + cugraph::unrenumber_int_vertices(handle.raft_handle(), + v1.data(), + v1.size(), + renumber_map_view->get(handle).data(), + vertex_partition_range_lasts); + + cugraph::unrenumber_int_vertices(handle.raft_handle(), + v2.data(), + v2.size(), + renumber_map_view->get(handle).data(), + vertex_partition_range_lasts); + } + + return std::make_tuple(std::move(v1), std::move(v2), std::move(result)); +} + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + vertex_pair_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + std::vector const& vertex_partition_range_lasts, + vertex_partition_view_t vertex_partition_view, + std::optional>& renumber_map_view); + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 2cc7856d87a..76b64b5692b 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -526,6 +526,7 @@ class kv_cuco_store_t { std::conditional_t, value_t, void>>(0, stream)) { allocate(capacity, invalid_key, invalid_value, stream); + if constexpr (!std::is_arithmetic_v) { invalid_value_ = invalid_value; } capacity_ = capacity; size_ = 0; } diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index 1f6f29d8683..fb837484a14 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -31,8 +31,6 @@ #include -#include - #include #include #include @@ -149,11 +147,11 @@ void bfs(raft::handle_t const& handle, auto constexpr invalid_distance = std::numeric_limits::max(); auto constexpr invalid_vertex = invalid_vertex_id::value; - thrust::fill(rmm::exec_policy(handle.get_thrust_policy()), + thrust::fill(handle.get_thrust_policy(), distances, distances + push_graph_view.local_vertex_partition_range_size(), invalid_distance); - thrust::fill(rmm::exec_policy(handle.get_thrust_policy()), + thrust::fill(handle.get_thrust_policy(), predecessor_first, predecessor_first + push_graph_view.local_vertex_partition_range_size(), invalid_vertex); @@ -161,7 +159,7 @@ void bfs(raft::handle_t const& handle, push_graph_view.local_vertex_partition_view()); if (n_sources) { thrust::for_each( - rmm::exec_policy(handle.get_thrust_policy()), + handle.get_thrust_policy(), sources, sources + n_sources, [vertex_partition, distances, predecessor_first] __device__(auto v) { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2dcda796f9c..ced3b7bedb1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -309,6 +309,10 @@ ConfigureTest(LOUVAIN_TEST community/louvain_test.cpp) # - LEIDEN tests ---------------------------------------------------------------------------------- ConfigureTest(LEIDEN_TEST community/leiden_test.cpp) +################################################################################################### +# - WEIGHTED MATCHING tests ---------------------------------------------------------------------------------- +ConfigureTest(WEIGHTED_MATCHING_TEST community/weighted_matching_test.cpp) + ################################################################################################### # - Legacy ECG tests ------------------------------------------------------------------------------------- ConfigureTest(LEGACY_ECG_TEST community/legacy_ecg_test.cpp) @@ -570,6 +574,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG LEIDEN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_LEIDEN_TEST community/mg_leiden_test.cpp) + ############################################################################################### + # - MG WEIGHTED MATCHING tests -------------------------------------------------------------------------- + ConfigureTestMG(MG_WEIGHTED_MATCHING_TEST community/mg_weighted_matching_test.cpp) + ############################################################################################### # - MG ECG tests -------------------------------------------------------------------------- ConfigureTestMG(MG_ECG_TEST community/mg_ecg_test.cpp) @@ -733,6 +741,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_K_CORE_TEST c_api/mg_k_core_test.c) ConfigureCTestMG(MG_CAPI_INDUCED_SUBGRAPH_TEST c_api/mg_induced_subgraph_test.c) ConfigureCTestMG(MG_CAPI_DEGREES c_api/mg_degrees_test.c) + ConfigureCTestMG(MG_CAPI_COUNT_MULTI_EDGES c_api/mg_count_multi_edges_test.c) ConfigureCTestMG(MG_CAPI_EGONET_TEST c_api/mg_egonet_test.c) ConfigureCTestMG(MG_CAPI_TWO_HOP_NEIGHBORS_TEST c_api/mg_two_hop_neighbors_test.c) @@ -777,6 +786,7 @@ ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c) ConfigureCTest(CAPI_K_CORE_TEST c_api/k_core_test.c) ConfigureCTest(CAPI_INDUCED_SUBGRAPH_TEST c_api/induced_subgraph_test.c) ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) +ConfigureCTest(CAPI_COUNT_MULTI_EDGES c_api/count_multi_edges_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) @@ -787,14 +797,31 @@ if (BUILD_CUGRAPH_MTMG_TESTS) ConfigureTest(MTMG_TEST mtmg/threaded_test.cu) target_link_libraries(MTMG_TEST PRIVATE - UCP::UCP + cugraphmgtestutil + ${COMPILED_RAFT_LIB} + ucx::ucp + ucx::ucs + ucxx::ucxx ) ConfigureTest(MTMG_LOUVAIN_TEST mtmg/threaded_test_louvain.cu) target_link_libraries(MTMG_LOUVAIN_TEST PRIVATE cugraphmgtestutil - UCP::UCP + ${COMPILED_RAFT_LIB} + ucx::ucp + ucx::ucs + ucxx::ucxx + ) + + ConfigureTest(MTMG_JACCARD_TEST mtmg/threaded_test_jaccard.cu) + target_link_libraries(MTMG_JACCARD_TEST + PRIVATE + cugraphmgtestutil + ${COMPILED_RAFT_LIB} + ucx::ucp + ucx::ucs + ucxx::ucxx ) if(BUILD_CUGRAPH_MG_TESTS) @@ -804,7 +831,10 @@ if (BUILD_CUGRAPH_MTMG_TESTS) target_link_libraries(MTMG_MULTINODE_TEST PRIVATE cugraphmgtestutil - UCP::UCP + ${COMPILED_RAFT_LIB} + ucx::ucp + ucx::ucs + ucxx::ucxx ) endif(BUILD_CUGRAPH_MG_TESTS) endif(BUILD_CUGRAPH_MTMG_TESTS) diff --git a/cpp/tests/c_api/count_multi_edges_test.c b/cpp/tests/c_api/count_multi_edges_test.c new file mode 100644 index 00000000000..222cf12ea36 --- /dev/null +++ b/cpp/tests/c_api/count_multi_edges_test.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +/* + * Create graph and count multi-edges + */ +int generic_count_multi_edges_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + bool_t is_symmetric, + bool_t is_multigraph, + size_t multi_edges_count) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + size_t result = 0; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + store_transposed, + FALSE, + is_symmetric, + is_multigraph, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_count_multi_edges(handle, graph, FALSE, &result, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_count_multi_edges failed."); + + TEST_ASSERT(test_ret_value, result == multi_edges_count, "multi-edge count did not match"); + + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_multi_edges_count() +{ + size_t num_edges = 14; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 0, 1, 1, 3, 0, 1}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 1, 3, 0, 1, 1, 0}; + weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + size_t multi_edge_count = 4; + + return generic_count_multi_edges_test( + h_src, h_dst, h_wgt, num_vertices, num_edges, TRUE, TRUE, TRUE, multi_edge_count); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + int result = 0; + result |= RUN_TEST(test_multi_edges_count); + return result; +} diff --git a/cpp/tests/c_api/mg_count_multi_edges_test.c b/cpp/tests/c_api/mg_count_multi_edges_test.c new file mode 100644 index 00000000000..69eaaff40dc --- /dev/null +++ b/cpp/tests/c_api/mg_count_multi_edges_test.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +/* + * Create graph and count multi-edges + */ +int generic_count_multi_edges_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + bool_t is_symmetric, + bool_t is_multigraph, + size_t multi_edges_count) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + size_t result = 0; + + ret_code = create_mg_test_graph_new(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + store_transposed, + FALSE, + is_symmetric, + is_multigraph, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_count_multi_edges(handle, graph, FALSE, &result, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_count_multi_edges failed."); + + TEST_ASSERT(test_ret_value, result == multi_edges_count, "multi-edge count did not match"); + + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_multi_edges_count(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 14; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 0, 1, 1, 3, 0, 1}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 1, 3, 0, 1, 1, 0}; + weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + size_t multi_edge_count = 4; + + return generic_count_multi_edges_test( + handle, h_src, h_dst, h_wgt, num_vertices, num_edges, TRUE, TRUE, TRUE, multi_edge_count); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_multi_edges_count, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/cpp/tests/community/mg_weighted_matching_test.cpp b/cpp/tests/community/mg_weighted_matching_test.cpp new file mode 100644 index 00000000000..21963922ab1 --- /dev/null +++ b/cpp/tests/community/mg_weighted_matching_test.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/test_graphs.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +struct WeightedMatching_UseCase { + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGWeightedMatching + : public ::testing::TestWithParam> { + public: + Tests_MGWeightedMatching() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + auto [weighted_matching_usecase, input_usecase] = param; + + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + constexpr bool multi_gpu = true; + + bool test_weighted = true; + bool renumber = true; + bool drop_self_loops = false; + bool drop_multi_edges = false; + + auto [mg_graph, mg_edge_weights, mg_renumber_map] = + cugraph::test::construct_graph( + *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); + + std::tie(mg_graph, mg_edge_weights, mg_renumber_map) = cugraph::symmetrize_graph( + *handle_, + std::move(mg_graph), + std::move(mg_edge_weights), + mg_renumber_map ? std::optional>(std::move(*mg_renumber_map)) + : std::nullopt, + false); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; + + std::optional> edge_mask{std::nullopt}; + if (weighted_matching_usecase.edge_masking) { + edge_mask = cugraph::test::generate::edge_property( + *handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + rmm::device_uvector mg_partners(0, handle_->get_stream()); + weight_t mg_matching_weights; + + std::forward_as_tuple(mg_partners, mg_matching_weights) = + cugraph::approximate_weighted_matching( + *handle_, mg_graph_view, (*mg_edge_weights).view()); + + if (weighted_matching_usecase.check_correctness) { + auto h_mg_partners = cugraph::test::to_host(*handle_, mg_partners); + + auto constexpr invalid_partner = cugraph::invalid_vertex_id::value; + + rmm::device_uvector mg_aggregate_partners(0, handle_->get_stream()); + std::tie(std::ignore, mg_aggregate_partners) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::optional>{std::nullopt}, + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span(mg_partners.data(), mg_partners.size())); + + cugraph::graph_t sg_graph(*handle_); + std::optional< + cugraph::edge_property_t, weight_t>> + sg_edge_weights{std::nullopt}; + std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>(std::nullopt), + false); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_graph_view = sg_graph.view(); + + rmm::device_uvector sg_partners(0, handle_->get_stream()); + weight_t sg_matching_weights; + + std::forward_as_tuple(sg_partners, sg_matching_weights) = + cugraph::approximate_weighted_matching( + *handle_, sg_graph_view, (*sg_edge_weights).view()); + auto h_sg_partners = cugraph::test::to_host(*handle_, sg_partners); + auto h_mg_aggregate_partners = cugraph::test::to_host(*handle_, mg_aggregate_partners); + + ASSERT_FLOAT_EQ(mg_matching_weights, sg_matching_weights) + << "SG and MG matching weights are different"; + ASSERT_TRUE( + std::equal(h_sg_partners.begin(), h_sg_partners.end(), h_mg_aggregate_partners.begin())); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGWeightedMatching::handle_ = nullptr; + +using Tests_MGWeightedMatching_File = Tests_MGWeightedMatching; +using Tests_MGWeightedMatching_Rmat = Tests_MGWeightedMatching; + +TEST_P(Tests_MGWeightedMatching_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGWeightedMatching_File, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGWeightedMatching_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGWeightedMatching_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGWeightedMatching_Rmat, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGWeightedMatching_Rmat, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGWeightedMatching_File, + ::testing::Combine(::testing::Values(WeightedMatching_UseCase{false}, + WeightedMatching_UseCase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGWeightedMatching_Rmat, + ::testing::Combine(::testing::Values(WeightedMatching_UseCase{false}, + WeightedMatching_UseCase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 3, 2, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGWeightedMatching_Rmat, + ::testing::Combine( + ::testing::Values(WeightedMatching_UseCase{false, false}, + WeightedMatching_UseCase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/weighted_matching_test.cpp b/cpp/tests/community/weighted_matching_test.cpp new file mode 100644 index 00000000000..436273c3be3 --- /dev/null +++ b/cpp/tests/community/weighted_matching_test.cpp @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/test_graphs.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +struct WeightedMatching_UseCase { + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_SGWeightedMatching + : public ::testing::TestWithParam> { + public: + Tests_SGWeightedMatching() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + auto [weighted_matching_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.start("Construct graph"); + } + + constexpr bool multi_gpu = false; + + bool test_weighted = true; + bool renumber = true; + bool drop_self_loops = false; + bool drop_multi_edges = false; + + auto [sg_graph, sg_edge_weights, sg_renumber_map] = + cugraph::test::construct_graph( + handle, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); + + std::tie(sg_graph, sg_edge_weights, sg_renumber_map) = cugraph::symmetrize_graph( + handle, std::move(sg_graph), std::move(sg_edge_weights), std::move(sg_renumber_map), false); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + std::optional> edge_mask{std::nullopt}; + if (weighted_matching_usecase.edge_masking) { + edge_mask = cugraph::test::generate::edge_property( + handle, sg_graph_view, 2); + sg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + rmm::device_uvector d_partners(0, handle.get_stream()); + weight_t total_matching_weights; + + std::forward_as_tuple(d_partners, total_matching_weights) = + cugraph::approximate_weighted_matching( + handle, sg_graph_view, (*sg_edge_weights).view()); + + if (weighted_matching_usecase.check_correctness) { + auto h_partners = cugraph::test::to_host(handle, d_partners); + auto constexpr invalid_partner = cugraph::invalid_vertex_id::value; + + std::for_each(h_partners.begin(), h_partners.end(), [&invalid_partner, h_partners](auto& v) { + if (v != invalid_partner) ASSERT_TRUE(h_partners[h_partners[v]] == v); + }); + } + } +}; + +using Tests_SGWeightedMatching_File = Tests_SGWeightedMatching; +using Tests_SGWeightedMatching_Rmat = Tests_SGWeightedMatching; + +TEST_P(Tests_SGWeightedMatching_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGWeightedMatching_File, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGWeightedMatching_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGWeightedMatching_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGWeightedMatching_Rmat, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGWeightedMatching_Rmat, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SGWeightedMatching_File, + ::testing::Combine(::testing::Values(WeightedMatching_UseCase{false}, + WeightedMatching_UseCase{true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_SGWeightedMatching_Rmat, + ::testing::Combine(::testing::Values(WeightedMatching_UseCase{false}, + WeightedMatching_UseCase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 3, 3, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_SGWeightedMatching_Rmat, + ::testing::Combine( + ::testing::Values(WeightedMatching_UseCase{false, false}, + WeightedMatching_UseCase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mtmg/multi_node_threaded_test.cu b/cpp/tests/mtmg/multi_node_threaded_test.cu index 0ac8c56bc46..1ad83761d51 100644 --- a/cpp/tests/mtmg/multi_node_threaded_test.cu +++ b/cpp/tests/mtmg/multi_node_threaded_test.cu @@ -14,8 +14,9 @@ * limitations under the License. */ #include "utilities/base_fixture.hpp" +#include "utilities/check_utilities.hpp" +#include "utilities/conversion_utilities.hpp" #include "utilities/test_graphs.hpp" -#include "utilities/test_utilities.hpp" #include "utilities/thrust_wrapper.hpp" #include diff --git a/cpp/tests/mtmg/threaded_test.cu b/cpp/tests/mtmg/threaded_test.cu index 5d902e35dfa..f55a102ea67 100644 --- a/cpp/tests/mtmg/threaded_test.cu +++ b/cpp/tests/mtmg/threaded_test.cu @@ -14,8 +14,9 @@ * limitations under the License. */ #include "utilities/base_fixture.hpp" +#include "utilities/check_utilities.hpp" +#include "utilities/conversion_utilities.hpp" #include "utilities/test_graphs.hpp" -#include "utilities/test_utilities.hpp" #include "utilities/thrust_wrapper.hpp" #include @@ -388,11 +389,11 @@ class Tests_Multithreaded std::for_each( computed_pageranks_v.begin(), computed_pageranks_v.end(), - [h_sg_pageranks, compare_functor, h_sg_renumber_map](auto t1) { + [&h_sg_pageranks, compare_functor, &h_sg_renumber_map](auto t1) { std::for_each( thrust::make_zip_iterator(std::get<0>(t1).begin(), std::get<1>(t1).begin()), thrust::make_zip_iterator(std::get<0>(t1).end(), std::get<1>(t1).end()), - [h_sg_pageranks, compare_functor, h_sg_renumber_map](auto t2) { + [&h_sg_pageranks, compare_functor, &h_sg_renumber_map](auto t2) { vertex_t v = thrust::get<0>(t2); weight_t pr = thrust::get<1>(t2); diff --git a/cpp/tests/mtmg/threaded_test_jaccard.cu b/cpp/tests/mtmg/threaded_test_jaccard.cu new file mode 100644 index 00000000000..a64cc8ee1fa --- /dev/null +++ b/cpp/tests/mtmg/threaded_test_jaccard.cu @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "utilities/base_fixture.hpp" +#include "utilities/check_utilities.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include +#include + +#include + +struct Multithreaded_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_Multithreaded + : public ::testing::TestWithParam> { + public: + Tests_Multithreaded() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + std::vector get_gpu_list() + { + int num_gpus_per_node{1}; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus_per_node)); + + std::vector gpu_list(num_gpus_per_node); + std::iota(gpu_list.begin(), gpu_list.end(), 0); + + return gpu_list; + } + + template + void run_current_test( + std::tuple const& param, + std::vector gpu_list) + { + using edge_type_t = int32_t; + + constexpr bool renumber = true; + constexpr bool do_expensive_check = false; + constexpr bool store_transposed = false; + constexpr bool test_weighted = false; + + auto [multithreaded_usecase, input_usecase] = param; + + raft::handle_t handle{}; + + size_t device_buffer_size{64 * 1024 * 1024}; + size_t thread_buffer_size{4 * 1024 * 1024}; + + const int num_threads_per_gpu{4}; + int num_gpus = gpu_list.size(); + int num_threads = num_gpus * num_threads_per_gpu; + + cugraph::mtmg::resource_manager_t resource_manager; + + std::for_each(gpu_list.begin(), gpu_list.end(), [&resource_manager](int gpu_id) { + resource_manager.register_local_gpu(gpu_id, rmm::cuda_device_id{gpu_id}); + }); + + ncclUniqueId instance_manager_id; + ncclGetUniqueId(&instance_manager_id); + + // Currently the only uses for multiple streams for each CPU threads + // associated with a particular GPU, which is a constant set above + auto instance_manager = resource_manager.create_instance_manager( + resource_manager.registered_ranks(), instance_manager_id, num_threads_per_gpu); + + cugraph::mtmg::edgelist_t edgelist; + cugraph::mtmg::graph_t graph; + cugraph::mtmg::graph_view_t graph_view; + cugraph::mtmg::vertex_pair_result_t jaccard_results; + std::optional> renumber_map = + std::make_optional>(); + + std::optional, + weight_t>> + edge_weights{std::nullopt}; + + // + // Simulate graph creation by spawning threads to walk through the + // local COO and add edges + // + std::vector running_threads; + + // Initialize shared edgelist object, one per GPU + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &edgelist, + device_buffer_size, + use_weight = true, + use_edge_id = false, + use_edge_type = false]() { + auto thread_handle = instance_manager->get_handle(); + + edgelist.set(thread_handle, device_buffer_size, use_weight, use_edge_id, use_edge_type); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + // Load SG edge list + auto [d_src_v, d_dst_v, d_weights_v, d_vertices_v, is_symmetric] = + input_usecase.template construct_edgelist( + handle, test_weighted, store_transposed, false); + + rmm::device_uvector d_unique_vertices(2 * d_src_v.size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), d_src_v.begin(), d_src_v.end(), d_unique_vertices.begin()); + thrust::copy(handle.get_thrust_policy(), + d_dst_v.begin(), + d_dst_v.end(), + d_unique_vertices.begin() + d_src_v.size()); + thrust::sort(handle.get_thrust_policy(), d_unique_vertices.begin(), d_unique_vertices.end()); + + d_unique_vertices.resize(thrust::distance(d_unique_vertices.begin(), + thrust::unique(handle.get_thrust_policy(), + d_unique_vertices.begin(), + d_unique_vertices.end())), + handle.get_stream()); + + auto h_src_v = cugraph::test::to_host(handle, d_src_v); + auto h_dst_v = cugraph::test::to_host(handle, d_dst_v); + auto h_weights_v = cugraph::test::to_host(handle, d_weights_v); + auto unique_vertices = cugraph::test::to_host(handle, d_unique_vertices); + + // Load edgelist from different threads. We'll use more threads than GPUs here + for (int i = 0; i < num_threads; ++i) { + running_threads.emplace_back([&instance_manager, + thread_buffer_size, + &edgelist, + &h_src_v, + &h_dst_v, + &h_weights_v, + i, + num_threads]() { + auto thread_handle = instance_manager->get_handle(); + cugraph::mtmg::per_thread_edgelist_t + per_thread_edgelist(edgelist.get(thread_handle), thread_buffer_size); + + for (size_t j = i; j < h_src_v.size(); j += num_threads) { + per_thread_edgelist.append( + thread_handle, + h_src_v[j], + h_dst_v[j], + h_weights_v ? std::make_optional((*h_weights_v)[j]) : std::nullopt, + std::nullopt, + std::nullopt); + } + + per_thread_edgelist.flush(thread_handle); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &graph, + &edge_weights, + &edgelist, + &renumber_map, + &jaccard_results, + is_symmetric = is_symmetric, + renumber, + do_expensive_check]() { + auto thread_handle = instance_manager->get_handle(); + + if (thread_handle.get_thread_rank() > 0) return; + + std::optional, + edge_t>> + edge_ids{std::nullopt}; + std::optional, + int32_t>> + edge_types{std::nullopt}; + + edgelist.finalize_buffer(thread_handle); + edgelist.consolidate_and_shuffle(thread_handle, store_transposed); + + cugraph::mtmg::create_graph_from_edgelist( + thread_handle, + edgelist, + cugraph::graph_properties_t{is_symmetric, true}, + renumber, + graph, + edge_weights, + edge_ids, + edge_types, + renumber_map, + do_expensive_check); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + graph_view = graph.view(); + + for (int i = 0; i < num_threads; ++i) { + running_threads.emplace_back( + [&instance_manager, &graph_view, &edge_weights, &jaccard_results]() { + auto thread_handle = instance_manager->get_handle(); + + if (thread_handle.get_thread_rank() > 0) return; + + auto local_results = + cugraph::jaccard_all_pairs_coefficients( + thread_handle.raft_handle(), + graph_view.get(thread_handle), + edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) + : std::nullopt, + std::nullopt, + std::nullopt, + true); + + jaccard_results.set(thread_handle, std::move(local_results)); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + std::vector, std::vector, std::vector>> + computed_similarities_v; + std::mutex computed_similarities_lock{}; + + auto jaccard_results_view = jaccard_results.view(); + auto renumber_map_view = renumber_map ? std::make_optional(renumber_map->view()) : std::nullopt; + + // Load computed_similarities from different threads. + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &graph_view, + &renumber_map_view, + &jaccard_results_view, + &computed_similarities_lock, + &computed_similarities_v, + &h_src_v, + &h_dst_v, + &h_weights_v, + &unique_vertices, + i, + num_threads]() { + auto thread_handle = instance_manager->get_handle(); + + auto number_of_vertices = unique_vertices.size(); + + std::vector my_vertex_list; + my_vertex_list.reserve((number_of_vertices + num_threads - 1) / num_threads); + + for (size_t j = i; j < number_of_vertices; j += num_threads) { + my_vertex_list.push_back(unique_vertices[j]); + } + + rmm::device_uvector d_my_vertex_list(my_vertex_list.size(), + thread_handle.raft_handle().get_stream()); + raft::update_device(d_my_vertex_list.data(), + my_vertex_list.data(), + my_vertex_list.size(), + thread_handle.raft_handle().get_stream()); + + auto xxx = graph_view.get_vertex_partition_range_lasts(thread_handle); + + // TODO: What's this going to look like? + auto [d_my_jaccard_v1, d_my_jaccard_v2, d_my_jaccard_similarity] = + jaccard_results_view.gather( + thread_handle, + raft::device_span{d_my_vertex_list.data(), d_my_vertex_list.size()}, + // graph_view.get_vertex_partition_range_lasts(thread_handle), + xxx, + graph_view.get_vertex_partition_view(thread_handle), + renumber_map_view); + + std::vector my_jaccard_v1(d_my_jaccard_v1.size()); + std::vector my_jaccard_v2(d_my_jaccard_v2.size()); + std::vector my_jaccard_similarity(d_my_jaccard_similarity.size()); + raft::update_host(my_jaccard_v1.data(), + d_my_jaccard_v1.data(), + d_my_jaccard_v1.size(), + thread_handle.raft_handle().get_stream()); + raft::update_host(my_jaccard_v2.data(), + d_my_jaccard_v2.data(), + d_my_jaccard_v2.size(), + thread_handle.raft_handle().get_stream()); + raft::update_host(my_jaccard_similarity.data(), + d_my_jaccard_similarity.data(), + d_my_jaccard_similarity.size(), + thread_handle.raft_handle().get_stream()); + + { + std::lock_guard lock(computed_similarities_lock); + computed_similarities_v.push_back(std::make_tuple( + std::move(my_jaccard_v1), std::move(my_jaccard_v2), std::move(my_jaccard_similarity))); + } + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + if (multithreaded_usecase.check_correctness) { + // Want to compare the results in computed_similarities_v with SG results + cugraph::graph_t sg_graph(handle); + std::optional< + cugraph::edge_property_t, + weight_t>> + sg_edge_weights{std::nullopt}; + + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore, std::ignore) = + cugraph::create_graph_from_edgelist(handle, + std::nullopt, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{is_symmetric, true}, + false); + + auto [sg_v1, sg_v2, sg_similarities] = + cugraph::jaccard_all_pairs_coefficients( + handle, + sg_graph.view(), + sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt, + std::nullopt, + std::nullopt); + + auto h_sg_v1 = cugraph::test::to_host(handle, sg_v1); + auto h_sg_v2 = cugraph::test::to_host(handle, sg_v2); + auto h_sg_similarities = cugraph::test::to_host(handle, sg_similarities); + auto compare_functor = cugraph::test::nearly_equal{ + weight_t{1e-3}, + weight_t{(weight_t{1} / static_cast(h_sg_v1.size())) * weight_t{1e-3}}}; + + std::map, weight_t> sg_results; + + std::for_each( + thrust::make_zip_iterator(h_sg_v1.begin(), h_sg_v2.begin(), h_sg_similarities.begin()), + thrust::make_zip_iterator(h_sg_v1.end(), h_sg_v2.end(), h_sg_similarities.end()), + [&sg_results](auto tuple) { + sg_results.insert(std::make_pair( + std::make_tuple(thrust::get<0>(tuple), thrust::get<1>(tuple)), thrust::get<2>(tuple))); + }); + + std::for_each( + computed_similarities_v.begin(), + computed_similarities_v.end(), + [&sg_results, compare_functor](auto t1) { + std::for_each( + thrust::make_zip_iterator( + std::get<0>(t1).begin(), std::get<1>(t1).begin(), std::get<2>(t1).begin()), + thrust::make_zip_iterator( + std::get<0>(t1).end(), std::get<1>(t1).end(), std::get<2>(t1).end()), + [&sg_results, compare_functor](auto t2) { + vertex_t v1 = thrust::get<0>(t2); + vertex_t v2 = thrust::get<1>(t2); + weight_t jaccard = thrust::get<2>(t2); + + auto pos = sg_results.find(std::make_tuple(v1, v2)); + + ASSERT_NE(pos, sg_results.end()) + << "vertex pair (" << v1 << "," << v2 << ") from mtmg result not found in SG result" + << std::endl; + + ASSERT_TRUE(compare_functor(jaccard, pos->second)) + << "vertex pair (" << v1 << "," << v2 << ") SG result = " << pos->second + << ", mtmg result = " << jaccard; + }); + }); + } + } +}; + +using Tests_Multithreaded_File = Tests_Multithreaded; +using Tests_Multithreaded_Rmat = Tests_Multithreaded; + +// FIXME: add tests for type combinations +TEST_P(Tests_Multithreaded_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), std::vector{{0, 1}}); +} + +TEST_P(Tests_Multithreaded_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), std::vector{{0, 1}}); +} + +INSTANTIATE_TEST_SUITE_P(file_test, + Tests_Multithreaded_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(Multithreaded_Usecase{true}), + ::testing::Values(cugraph::test::File_Usecase("karate.csv"), + cugraph::test::File_Usecase("dolphins.csv")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Multithreaded_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values(Multithreaded_Usecase{true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Multithreaded_File, + ::testing::Combine( + // disable correctness checks + ::testing::Values(Multithreaded_Usecase{false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Multithreaded_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(Multithreaded_Usecase{false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mtmg/threaded_test_louvain.cu b/cpp/tests/mtmg/threaded_test_louvain.cu index ff9641d59f8..c8faf33dae2 100644 --- a/cpp/tests/mtmg/threaded_test_louvain.cu +++ b/cpp/tests/mtmg/threaded_test_louvain.cu @@ -14,9 +14,9 @@ * limitations under the License. */ #include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" #include "utilities/device_comm_wrapper.hpp" #include "utilities/test_graphs.hpp" -#include "utilities/test_utilities.hpp" #include "utilities/thrust_wrapper.hpp" #include @@ -83,6 +83,7 @@ class Tests_Multithreaded constexpr bool renumber = true; constexpr bool do_expensive_check = false; + constexpr bool store_transposed = false; auto [multithreaded_usecase, input_usecase] = param; @@ -111,17 +112,18 @@ class Tests_Multithreaded resource_manager.registered_ranks(), instance_manager_id); cugraph::mtmg::edgelist_t edgelist; - cugraph::mtmg::graph_t graph; - cugraph::mtmg::graph_view_t graph_view; + cugraph::mtmg::graph_t graph; + cugraph::mtmg::graph_view_t graph_view; cugraph::mtmg::vertex_result_t louvain_clusters; std::optional> renumber_map = std::make_optional>(); - auto edge_weights = multithreaded_usecase.test_weighted - ? std::make_optional, - weight_t>>() - : std::nullopt; + auto edge_weights = + multithreaded_usecase.test_weighted + ? std::make_optional, + weight_t>>() + : std::nullopt; // // Simulate graph creation by spawning threads to walk through the @@ -220,29 +222,34 @@ class Tests_Multithreaded if (thread_handle.get_thread_rank() > 0) return; std::optional, + cugraph::mtmg::graph_view_t, edge_t>> edge_ids{std::nullopt}; std::optional, + cugraph::mtmg::graph_view_t, int32_t>> edge_types{std::nullopt}; edgelist.finalize_buffer(thread_handle); - edgelist.consolidate_and_shuffle(thread_handle, false); - - cugraph::mtmg:: - create_graph_from_edgelist( - thread_handle, - edgelist, - cugraph::graph_properties_t{is_symmetric, true}, - renumber, - graph, - edge_weights, - edge_ids, - edge_types, - renumber_map, - do_expensive_check); + edgelist.consolidate_and_shuffle(thread_handle, store_transposed); + + cugraph::mtmg::create_graph_from_edgelist( + thread_handle, + edgelist, + cugraph::graph_properties_t{is_symmetric, true}, + renumber, + graph, + edge_weights, + edge_ids, + edge_types, + renumber_map, + do_expensive_check); }); } @@ -365,9 +372,10 @@ class Tests_Multithreaded if (multithreaded_usecase.check_correctness) { // Want to compare the results in computed_clusters_v with SG results - cugraph::graph_t sg_graph(handle); + cugraph::graph_t sg_graph(handle); std::optional< - cugraph::edge_property_t, weight_t>> + cugraph::edge_property_t, + weight_t>> sg_edge_weights{std::nullopt}; for (int i = 0; i < num_gpus; ++i) { diff --git a/dependencies.yaml b/dependencies.yaml index 9dca069ea33..c0699fdb1c5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -565,7 +565,7 @@ dependencies: - cugraph==24.6.* - pytorch>=2.0 - pytorch-cuda==11.8 - - pyg>=2.4.0 + - pyg>=2.5,<2.6 depends_on_rmm: common: diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst index 3e6664b2db5..8b5efd7aa36 100644 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst +++ b/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst @@ -47,10 +47,26 @@ Graph Attention (GATConv/GATv2Conv) .. autosummary:: :toctree: ../../api/ops - operators.mha_gat_n2n_fwd - operators.mha_gat_n2n_bwd - operators.mha_gat_n2n_efeat_fwd - operators.mha_gat_n2n_efeat_bwd + operators.mha_gat_n2n_fwd_bf16_fp32 + operators.mha_gat_n2n_fwd_fp16_fp32 + operators.mha_gat_n2n_fwd_fp32_fp32 + operators.mha_gat_n2n_bwd_bf16_bf16_bf16_fp32 + operators.mha_gat_n2n_bwd_bf16_bf16_fp32_fp32 + operators.mha_gat_n2n_bwd_bf16_fp32_fp32_fp32 + operators.mha_gat_n2n_bwd_fp16_fp16_fp16_fp32 + operators.mha_gat_n2n_bwd_fp16_fp16_fp32_fp32 + operators.mha_gat_n2n_bwd_fp16_fp32_fp32_fp32 + operators.mha_gat_n2n_bwd_fp32_fp32_fp32_fp32 + operators.mha_gat_n2n_efeat_fwd_bf16_fp32 + operators.mha_gat_n2n_efeat_fwd_fp16_fp32 + operators.mha_gat_n2n_efeat_fwd_fp32_fp32 + operators.mha_gat_n2n_efeat_bwd_bf16_bf16_bf16_fp32 + operators.mha_gat_n2n_efeat_bwd_bf16_bf16_fp32_fp32 + operators.mha_gat_n2n_efeat_bwd_bf16_fp32_fp32_fp32 + operators.mha_gat_n2n_efeat_bwd_fp16_fp16_fp16_fp32 + operators.mha_gat_n2n_efeat_bwd_fp16_fp16_fp32_fp32 + operators.mha_gat_n2n_efeat_bwd_fp16_fp32_fp32_fp32 + operators.mha_gat_n2n_efeat_bwd_fp32_fp32_fp32_fp32 operators.mha_gat_v2_n2n_fwd operators.mha_gat_v2_n2n_bwd diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py index 952b962aca2..66bc3137fba 100644 --- a/docs/cugraph/source/conf.py +++ b/docs/cugraph/source/conf.py @@ -190,9 +190,17 @@ 'Miscellaneous'), ] -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} - +# Connect docs in other projects +intersphinx_mapping = { + "networkx": ( + "https://networkx.org/documentation/stable/", + "https://networkx.org/documentation/stable/objects.inv", + ), + "python": ( + "https://docs.python.org/3", + "https://docs.python.org/3/objects.inv", + ), +} # Config numpydoc numpydoc_show_inherited_class_members = False diff --git a/docs/cugraph/source/graph_support/DGL_support.md b/docs/cugraph/source/graph_support/DGL_support.md index dc4f66180ac..9df462155fd 100644 --- a/docs/cugraph/source/graph_support/DGL_support.md +++ b/docs/cugraph/source/graph_support/DGL_support.md @@ -17,7 +17,7 @@ mamba install cugraph-dgl -c rapidsai-nightly -c rapidsai -c pytorch -c conda-fo ### Create the conda development environment ``` -mamba env create -n cugraph_dgl_dev --file conda/cugraph_dgl_dev_11.6.yml +conda env create -n cugraph_dgl_dev --file conda/environments/all_cuda-122_arch-x86_64.yaml ``` ### Install in editable mode diff --git a/docs/cugraph/source/graph_support/cugraphops_support.rst b/docs/cugraph/source/graph_support/cugraphops_support.rst index fd79564f849..96b13f62a9c 100644 --- a/docs/cugraph/source/graph_support/cugraphops_support.rst +++ b/docs/cugraph/source/graph_support/cugraphops_support.rst @@ -7,4 +7,4 @@ cugraph-ops aims to be a low-level, framework agnostic library providing commonl .. toctree:: :maxdepth: 3 - https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md + https://github.com/rapidsai/cugraph/blob/branch-24.06/readme_pages/cugraph_ops.md diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml index 94e9f1decbd..ebef0094cfa 100644 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -12,7 +12,7 @@ dependencies: - cugraph==24.6.* - pandas - pre-commit -- pyg>=2.4.0 +- pyg>=2.5,<2.6 - pylibcugraphops==24.6.* - pytest - pytest-benchmark diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py index 10431a0398d..713448a8203 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,11 +15,15 @@ from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch import CSC, HeteroCSC +import pylibcugraphops.pytorch + torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") +# A tuple of (row, colptr, num_src_nodes) +CSC = Tuple[torch.Tensor, torch.Tensor, int] + class BaseConv(torch.nn.Module): # pragma: no cover r"""An abstract base class for implementing cugraph-ops message passing layers.""" @@ -33,10 +37,7 @@ def to_csc( edge_index: torch.Tensor, size: Optional[Tuple[int, int]] = None, edge_attr: Optional[torch.Tensor] = None, - ) -> Union[ - Tuple[torch.Tensor, torch.Tensor, int], - Tuple[Tuple[torch.Tensor, torch.Tensor, int], torch.Tensor], - ]: + ) -> Union[CSC, Tuple[CSC, torch.Tensor],]: r"""Returns a CSC representation of an :obj:`edge_index` tensor to be used as input to cugraph-ops conv layers. @@ -71,19 +72,17 @@ def to_csc( def get_cugraph( self, - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> CSC: + ) -> Tuple[pylibcugraphops.pytorch.CSC, Optional[torch.Tensor]]: r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. Supports both bipartite and non-bipartite graphs. Args: - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge + indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for + CSC representation. bipartite (bool): If set to :obj:`True`, will create the bipartite structure in cugraph-ops. (default: :obj:`False`) max_num_neighbors (int, optional): The maximum number of neighbors @@ -91,7 +90,13 @@ def get_cugraph( the message-flow-graph primitives in cugraph-ops. (default: :obj:`None`) """ - row, colptr, num_src_nodes = csc + perm = None + if isinstance(edge_index, torch_geometric.EdgeIndex): + edge_index, perm = edge_index.sort_by("col") + num_src_nodes = edge_index.get_sparse_size(0) + (colptr, row), _ = edge_index.get_csc() + else: + row, colptr, num_src_nodes = edge_index if not row.is_cuda: raise RuntimeError( @@ -102,32 +107,33 @@ def get_cugraph( if max_num_neighbors is None: max_num_neighbors = -1 - return CSC( - offsets=colptr, - indices=row, - num_src_nodes=num_src_nodes, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, + return ( + pylibcugraphops.pytorch.CSC( + offsets=colptr, + indices=row, + num_src_nodes=num_src_nodes, + dst_max_in_degree=max_num_neighbors, + is_bipartite=bipartite, + ), + perm, ) def get_typed_cugraph( self, - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], edge_type: torch.Tensor, num_edge_types: Optional[int] = None, bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> HeteroCSC: + ) -> Tuple[pylibcugraphops.pytorch.HeteroCSC, Optional[torch.Tensor]]: r"""Constructs a typed :obj:`cugraph` graph object from a CSC representation where each edge corresponds to a given edge type. Supports both bipartite and non-bipartite graphs. Args: - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge + indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for + CSC representation. edge_type (torch.Tensor): The edge type. num_edge_types (int, optional): The maximum number of edge types. When not given, will be computed on-the-fly, leading to @@ -145,32 +151,40 @@ def get_typed_cugraph( if max_num_neighbors is None: max_num_neighbors = -1 - row, colptr, num_src_nodes = csc + perm = None + if isinstance(edge_index, torch_geometric.EdgeIndex): + edge_index, perm = edge_index.sort_by("col") + edge_type = edge_type[perm] + num_src_nodes = edge_index.get_sparse_size(0) + (colptr, row), _ = edge_index.get_csc() + else: + row, colptr, num_src_nodes = edge_index edge_type = edge_type.int() - return HeteroCSC( - offsets=colptr, - indices=row, - edge_types=edge_type, - num_src_nodes=num_src_nodes, - num_edge_types=num_edge_types, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, + return ( + pylibcugraphops.pytorch.HeteroCSC( + offsets=colptr, + indices=row, + edge_types=edge_type, + num_src_nodes=num_src_nodes, + num_edge_types=num_edge_types, + dst_max_in_degree=max_num_neighbors, + is_bipartite=bipartite, + ), + perm, ) def forward( self, x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], ) -> torch.Tensor: r"""Runs the forward pass of the module. Args: x (torch.Tensor): The node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge + indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for + CSC representation. """ raise NotImplementedError diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py index d1785f2bef8..981b1c5b50d 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py @@ -16,7 +16,7 @@ from cugraph.utilities.utils import import_optional from pylibcugraphops.pytorch.operators import mha_gat_n2n -from .base import BaseConv +from .base import BaseConv, CSC torch = import_optional("torch") nn = import_optional("torch.nn") @@ -159,7 +159,7 @@ def reset_parameters(self): def forward( self, x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], edge_attr: Optional[torch.Tensor] = None, max_num_neighbors: Optional[int] = None, deterministic_dgrad: bool = False, @@ -172,11 +172,7 @@ def forward( Args: x (torch.Tensor or tuple): The node features. Can be a tuple of tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex or CSC): The edge indices. edge_attr: (torch.Tensor, optional) The edge features. max_num_neighbors (int, optional): The maximum number of neighbors of a destination node. When enabled, it allows models to use @@ -198,9 +194,12 @@ def forward( the corresponding input type at the very end. """ bipartite = not isinstance(x, torch.Tensor) - graph = self.get_cugraph( - csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors + graph, perm = self.get_cugraph( + edge_index=edge_index, + bipartite=bipartite, + max_num_neighbors=max_num_neighbors, ) + if deterministic_dgrad: graph.add_reverse_graph() @@ -212,6 +211,8 @@ def forward( ) if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) + if perm is not None: + edge_attr = edge_attr[perm] edge_attr = self.lin_edge(edge_attr) if bipartite: diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py index 33865898816..ebb30de9754 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py @@ -16,7 +16,7 @@ from cugraph.utilities.utils import import_optional from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n -from .base import BaseConv +from .base import BaseConv, CSC torch = import_optional("torch") nn = import_optional("torch.nn") @@ -172,7 +172,7 @@ def reset_parameters(self): def forward( self, x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], edge_attr: Optional[torch.Tensor] = None, deterministic_dgrad: bool = False, deterministic_wgrad: bool = False, @@ -182,11 +182,7 @@ def forward( Args: x (torch.Tensor or tuple): The node features. Can be a tuple of tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex or CSC): The edge indices. edge_attr: (torch.Tensor, optional) The edge features. deterministic_dgrad : bool, default=False Optional flag indicating whether the feature gradients @@ -196,7 +192,7 @@ def forward( are computed deterministically using a dedicated workspace buffer. """ bipartite = not isinstance(x, torch.Tensor) or not self.share_weights - graph = self.get_cugraph(csc, bipartite=bipartite) + graph, perm = self.get_cugraph(edge_index, bipartite=bipartite) if deterministic_dgrad: graph.add_reverse_graph() @@ -208,6 +204,8 @@ def forward( ) if edge_attr.dim() == 1: edge_attr = edge_attr.view(-1, 1) + if perm is not None: + edge_attr = edge_attr[perm] edge_attr = self.lin_edge(edge_attr) if bipartite: diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py index 3b717552a96..a73dd8e57ff 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ from pylibcugraphops.pytorch.operators import mha_gat_n2n from .base import BaseConv +from cugraph_pyg.utils.imports import package_available torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") @@ -74,10 +75,10 @@ def __init__( bias: bool = True, aggr: str = "sum", ): - major, minor, patch = torch_geometric.__version__.split(".")[:3] - pyg_version = tuple(map(int, [major, minor, patch])) - if pyg_version < (2, 4, 0): - raise RuntimeError(f"{self.__class__.__name__} requires pyg >= 2.4.0.") + if not package_available("torch_geometric>=2.4.0"): + raise RuntimeError( + f"{self.__class__.__name__} requires torch_geometric>=2.4.0." + ) super().__init__() @@ -225,7 +226,7 @@ def forward( ) if src_type == dst_type: - graph = self.get_cugraph( + graph, _ = self.get_cugraph( csc, bipartite=False, ) @@ -240,7 +241,7 @@ def forward( ) else: - graph = self.get_cugraph( + graph, _ = self.get_cugraph( csc, bipartite=True, ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py index 683780b66eb..13fa08db5c5 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,12 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple +from typing import Optional, Union from cugraph.utilities.utils import import_optional from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post -from .base import BaseConv +from .base import BaseConv, CSC torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") @@ -110,13 +110,16 @@ def reset_parameters(self): def forward( self, x: torch.Tensor, - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], edge_type: torch.Tensor, max_num_neighbors: Optional[int] = None, ) -> torch.Tensor: - graph = self.get_typed_cugraph( - csc, edge_type, self.num_relations, max_num_neighbors=max_num_neighbors + graph, _ = self.get_typed_cugraph( + edge_index, + edge_type, + self.num_relations, + max_num_neighbors=max_num_neighbors, ) out = agg_hg_basis_n2n_post( diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py index 8e0c1027416..65dc99d8988 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ from cugraph.utilities.utils import import_optional from pylibcugraphops.pytorch.operators import agg_concat_n2n -from .base import BaseConv +from .base import BaseConv, CSC torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") @@ -116,12 +116,14 @@ def reset_parameters(self): def forward( self, x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], max_num_neighbors: Optional[int] = None, ) -> torch.Tensor: bipartite = isinstance(x, Tuple) - graph = self.get_cugraph( - csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors + graph, _ = self.get_cugraph( + edge_index=edge_index, + bipartite=bipartite, + max_num_neighbors=max_num_neighbors, ) if self.project: diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py index 41c0b4b4090..e184ee0e893 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ from cugraph.utilities.utils import import_optional from pylibcugraphops.pytorch.operators import mha_simple_n2n -from .base import BaseConv +from .base import BaseConv, CSC torch = import_optional("torch") nn = import_optional("torch.nn") @@ -153,7 +153,7 @@ def reset_parameters(self): def forward( self, x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_index: Union[torch_geometric.EdgeIndex, CSC], edge_attr: Optional[torch.Tensor] = None, ) -> torch.Tensor: r"""Runs the forward pass of the module. @@ -161,15 +161,11 @@ def forward( Args: x (torch.Tensor or tuple): The node features. Can be a tuple of tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. + edge_index (EdgeIndex or CSC): The edge indices. edge_attr: (torch.Tensor, optional) The edge features. """ bipartite = True - graph = self.get_cugraph(csc, bipartite=bipartite) + graph, perm = self.get_cugraph(edge_index=edge_index, bipartite=bipartite) if isinstance(x, torch.Tensor): x = (x, x) @@ -184,6 +180,8 @@ def forward( f"{self.__class__.__name__}.edge_dim must be set to accept " f"edge features." ) + if perm is not None: + edge_attr = edge_attr[perm] edge_attr = self.lin_edge(edge_attr) out = mha_simple_n2n( diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py index ffab54efe08..8bcfb783ae1 100644 --- a/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py +++ b/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py @@ -171,8 +171,8 @@ def _sampler_output_from_sampling_results_homogeneous_coo( row=row_dict, col=col_dict, edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, + num_sampled_nodes={k: t.tolist() for k, t in num_nodes_per_hop_dict.items()}, + num_sampled_edges={k: t.tolist() for k, t in num_edges_per_hop_dict.items()}, metadata=metadata, ) @@ -222,7 +222,9 @@ def _sampler_output_from_sampling_results_homogeneous_csr( major_offsets = major_offsets.clone() - major_offsets[0] label_hop_offsets = label_hop_offsets.clone() - label_hop_offsets[0] - num_edges_per_hop_dict = {edge_type: major_offsets[label_hop_offsets].diff().cpu()} + num_edges_per_hop_dict = { + edge_type: major_offsets[label_hop_offsets].diff().tolist() + } label_hop_offsets = label_hop_offsets.cpu() num_nodes_per_hop_dict = { @@ -231,7 +233,7 @@ def _sampler_output_from_sampling_results_homogeneous_csr( label_hop_offsets.diff(), (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,)), ] - ).cpu() + ).tolist() } noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")} @@ -397,8 +399,8 @@ def _sampler_output_from_sampling_results_heterogeneous( row=row_dict, col=col_dict, edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, + num_sampled_nodes={k: t.tolist() for k, t in num_nodes_per_hop_dict.items()}, + num_sampled_edges={k: t.tolist() for k, t in num_edges_per_hop_dict.items()}, metadata=metadata, ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py index 7047c62250b..85acbebc3ec 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py @@ -373,10 +373,15 @@ def test_get_input_nodes(karate_gnn, dask_client): F, G, N = karate_gnn cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - node_type, input_nodes = torch_geometric.loader.utils.get_input_nodes( + nodes = torch_geometric.loader.utils.get_input_nodes( (cugraph_store, cugraph_store), "type0" ) + if len(nodes) == 2: + node_type, input_nodes = nodes + else: + node_type, input_nodes, _ = nodes + assert node_type == "type0" assert input_nodes.tolist() == torch.arange(17, dtype=torch.int32).tolist() diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py index 62bebb9211d..a26063f62fa 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,10 +14,15 @@ import pytest from cugraph_pyg.nn import GATConv as CuGraphGATConv +from cugraph_pyg.utils.imports import package_available ATOL = 1e-6 +@pytest.mark.skipif( + package_available("torch_geometric<2.5"), reason="Test requires pyg>=2.5" +) +@pytest.mark.parametrize("use_edge_index", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("bipartite", [True, False]) @pytest.mark.parametrize("concat", [True, False]) @@ -26,10 +31,18 @@ @pytest.mark.parametrize("use_edge_attr", [True, False]) @pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) def test_gat_conv_equality( - bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr, graph, request + use_edge_index, + bias, + bipartite, + concat, + heads, + max_num_neighbors, + use_edge_attr, + graph, + request, ): - pytest.importorskip("torch_geometric", reason="PyG not available") import torch + from torch_geometric import EdgeIndex from torch_geometric.nn import GATConv torch.manual_seed(12345) @@ -50,13 +63,19 @@ def test_gat_conv_equality( if use_edge_attr: edge_dim = 3 edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - csc, edge_attr_perm = CuGraphGATConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) else: - edge_dim = None - edge_attr = edge_attr_perm = None - csc = CuGraphGATConv.to_csc(edge_index, size) + edge_dim = edge_attr = None + + if use_edge_index: + csc = EdgeIndex(edge_index, sparse_size=size) + else: + if use_edge_attr: + csc, edge_attr_perm = CuGraphGATConv.to_csc( + edge_index, size, edge_attr=edge_attr + ) + else: + csc = CuGraphGATConv.to_csc(edge_index, size) + edge_attr_perm = None kwargs = dict(bias=bias, concat=concat, edge_dim=edge_dim) @@ -68,19 +87,24 @@ def test_gat_conv_equality( out_dim = heads * out_channels with torch.no_grad(): if bipartite: - conv2.lin_src.weight.data = conv1.lin_src.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.lin_dst.weight.data.detach().clone() + conv2.lin_src.weight.copy_(conv1.lin_src.weight) + conv2.lin_dst.weight.copy_(conv1.lin_dst.weight) else: - conv2.lin.weight.data = conv1.lin_src.weight.data.detach().clone() + conv2.lin.weight.copy_(conv1.lin.weight) - conv2.att.data[:out_dim] = conv1.att_src.data.flatten() - conv2.att.data[out_dim : 2 * out_dim] = conv1.att_dst.data.flatten() + conv2.att[:out_dim].copy_(conv1.att_src.flatten()) + conv2.att[out_dim : 2 * out_dim].copy_(conv1.att_dst.flatten()) if use_edge_attr: - conv2.att.data[2 * out_dim :] = conv1.att_edge.data.flatten() - conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone() + conv2.att[2 * out_dim :].copy_(conv1.att_edge.flatten()) + conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) out1 = conv1(x, edge_index, edge_attr=edge_attr) - out2 = conv2(x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors) + if use_edge_index: + out2 = conv2(x, csc, edge_attr=edge_attr, max_num_neighbors=max_num_neighbors) + else: + out2 = conv2( + x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors + ) assert torch.allclose(out1, out2, atol=ATOL) grad_output = torch.rand_like(out1) @@ -95,9 +119,7 @@ def test_gat_conv_equality( conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL ) else: - assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=ATOL - ) + assert torch.allclose(conv1.lin.weight.grad, conv2.lin.weight.grad, atol=ATOL) assert torch.allclose( conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py index a4794628410..a62f2fed2f7 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,14 +18,18 @@ ATOL = 1e-6 +@pytest.mark.parametrize("use_edge_index", [True, False]) @pytest.mark.parametrize("bipartite", [True, False]) @pytest.mark.parametrize("concat", [True, False]) @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) @pytest.mark.parametrize("use_edge_attr", [True, False]) @pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, request): +def test_gatv2_conv_equality( + use_edge_index, bipartite, concat, heads, use_edge_attr, graph, request +): pytest.importorskip("torch_geometric", reason="PyG not available") import torch + from torch_geometric import EdgeIndex from torch_geometric.nn import GATv2Conv torch.manual_seed(12345) @@ -46,13 +50,19 @@ def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, req if use_edge_attr: edge_dim = 3 edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - csc, edge_attr_perm = CuGraphGATv2Conv.to_csc( - edge_index, size, edge_attr=edge_attr - ) else: - edge_dim = None - edge_attr = edge_attr_perm = None - csc = CuGraphGATv2Conv.to_csc(edge_index, size) + edge_dim = edge_attr = None + + if use_edge_index: + csc = EdgeIndex(edge_index, sparse_size=size) + else: + if use_edge_attr: + csc, edge_attr_perm = CuGraphGATv2Conv.to_csc( + edge_index, size, edge_attr=edge_attr + ) + else: + csc = CuGraphGATv2Conv.to_csc(edge_index, size) + edge_attr_perm = None kwargs = dict(bias=False, concat=concat, edge_dim=edge_dim) @@ -62,14 +72,17 @@ def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, req conv2 = CuGraphGATv2Conv(in_channels, out_channels, heads, **kwargs).cuda() with torch.no_grad(): - conv2.lin_src.weight.data = conv1.lin_l.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.lin_r.weight.data.detach().clone() - conv2.att.data = conv1.att.data.flatten().detach().clone() + conv2.lin_src.weight.copy_(conv1.lin_l.weight) + conv2.lin_dst.weight.copy_(conv1.lin_r.weight) + conv2.att.copy_(conv1.att.flatten()) if use_edge_attr: - conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone() + conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) out1 = conv1(x, edge_index, edge_attr=edge_attr) - out2 = conv2(x, csc, edge_attr=edge_attr_perm) + if use_edge_index: + out2 = conv2(x, csc, edge_attr=edge_attr) + else: + out2 = conv2(x, csc, edge_attr=edge_attr_perm) assert torch.allclose(out1, out2, atol=ATOL) grad_output = torch.rand_like(out1) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py index 1c841a17df7..d8190ea345f 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,31 +14,23 @@ import pytest from cugraph_pyg.nn import HeteroGATConv as CuGraphHeteroGATConv -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") +from cugraph_pyg.utils.imports import package_available ATOL = 1e-6 @pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.skipif( - isinstance(torch_geometric, MissingModule), reason="torch_geometric not available" + package_available("torch_geometric<2.4"), reason="Test requires pyg>=2.4" ) @pytest.mark.parametrize("heads", [1, 3, 10]) @pytest.mark.parametrize("aggr", ["sum", "mean"]) def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): - major, minor, patch = torch_geometric.__version__.split(".")[:3] - pyg_version = tuple(map(int, [major, minor, patch])) - if pyg_version < (2, 4, 0): - pytest.skip("Skipping HeteroGATConv test") - + import torch from torch_geometric.data import HeteroData from torch_geometric.nn import HeteroConv, GATConv - device = torch.device("cuda:0") + device = torch.device("cuda") data = HeteroData(sample_pyg_hetero_data).to(device) in_channels_dict = {k: v.size(1) for k, v in data.x_dict.items()} @@ -73,16 +65,19 @@ def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): with torch.no_grad(): for edge_type in conv2.edge_types: src_t, _, dst_t = edge_type - w_src[edge_type][:, :] = conv1.convs[edge_type].lin_src.weight[:, :] - if w_dst[edge_type] is not None: - w_dst[edge_type][:, :] = conv1.convs[edge_type].lin_dst.weight[:, :] - - conv2.attn_weights[edge_type][: heads * out_channels] = conv1.convs[ - edge_type - ].att_src.data.flatten() - conv2.attn_weights[edge_type][heads * out_channels :] = conv1.convs[ - edge_type - ].att_dst.data.flatten() + if src_t == dst_t: + w_src[edge_type].copy_(conv1.convs[edge_type].lin.weight) + else: + w_src[edge_type].copy_(conv1.convs[edge_type].lin_src.weight) + if w_dst[edge_type] is not None: + w_dst[edge_type].copy_(conv1.convs[edge_type].lin_dst.weight) + + conv2.attn_weights[edge_type][: heads * out_channels].copy_( + conv1.convs[edge_type].att_src.flatten() + ) + conv2.attn_weights[edge_type][heads * out_channels :].copy_( + conv1.convs[edge_type].att_dst.flatten() + ) out1 = conv1(data.x_dict, data.edge_index_dict) out2 = conv2(data.x_dict, data.edge_index_dict) @@ -118,7 +113,11 @@ def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): for node_t, (rels_as_src, rels_as_dst) in conv2.relations_per_ntype.items(): grad_list = [] for rel_t in rels_as_src: - grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) + src_type, _, dst_type = rel_t + if src_type == dst_type: + grad_list.append(conv1.convs[rel_t].lin.weight.grad.clone()) + else: + grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) for rel_t in rels_as_dst: grad_list.append(conv1.convs[rel_t].lin_dst.weight.grad.clone()) assert len(grad_list) > 0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py index ded4f300c0c..fc0aaf25b7b 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ ATOL = 1e-6 +@pytest.mark.parametrize("use_edge_index", [True, False]) @pytest.mark.parametrize("aggr", ["add", "sum", "mean"]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("max_num_neighbors", [8, None]) @@ -25,10 +26,18 @@ @pytest.mark.parametrize("root_weight", [True, False]) @pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) def test_rgcn_conv_equality( - aggr, bias, max_num_neighbors, num_bases, root_weight, graph, request + use_edge_index, + aggr, + bias, + max_num_neighbors, + num_bases, + root_weight, + graph, + request, ): pytest.importorskip("torch_geometric", reason="PyG not available") import torch + from torch_geometric import EdgeIndex from torch_geometric.nn import FastRGCNConv as RGCNConv torch.manual_seed(12345) @@ -39,23 +48,30 @@ def test_rgcn_conv_equality( edge_index = edge_index.cuda() edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda() + if use_edge_index: + csc = EdgeIndex(edge_index, sparse_size=size) + else: + csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) + x = torch.rand(size[0], in_channels, device="cuda") - csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() with torch.no_grad(): if root_weight: - conv2.weight.data[:-1] = conv1.weight.data - conv2.weight.data[-1] = conv1.root.data + conv2.weight[:-1].copy_(conv1.weight) + conv2.weight[-1].copy_(conv1.root) else: - conv2.weight.data = conv1.weight.data.detach().clone() + conv2.weight.copy_(conv1.weight) if num_bases is not None: - conv2.comp.data = conv1.comp.data.detach().clone() + conv2.comp.copy_(conv1.comp) out1 = conv1(x, edge_index, edge_type) - out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) + if use_edge_index: + out2 = conv2(x, csc, edge_type) + else: + out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) assert torch.allclose(out1, out2, atol=ATOL) grad_out = torch.rand_like(out1) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py index b2977d1d175..9d8d413c590 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ ATOL = 1e-6 +@pytest.mark.parametrize("use_edge_index", [True, False]) @pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("bipartite", [True, False]) @@ -26,16 +27,29 @@ @pytest.mark.parametrize("root_weight", [True, False]) @pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) def test_sage_conv_equality( - aggr, bias, bipartite, max_num_neighbors, normalize, root_weight, graph, request + use_edge_index, + aggr, + bias, + bipartite, + max_num_neighbors, + normalize, + root_weight, + graph, + request, ): pytest.importorskip("torch_geometric", reason="PyG not available") import torch + from torch_geometric import EdgeIndex from torch_geometric.nn import SAGEConv torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() - csc = CuGraphSAGEConv.to_csc(edge_index, size) + + if use_edge_index: + csc = EdgeIndex(edge_index, sparse_size=size) + else: + csc = CuGraphSAGEConv.to_csc(edge_index, size) if bipartite: in_channels = (7, 3) @@ -55,11 +69,11 @@ def test_sage_conv_equality( in_channels_src = conv2.in_channels_src with torch.no_grad(): - conv2.lin.weight.data[:, :in_channels_src] = conv1.lin_l.weight.data + conv2.lin.weight[:, :in_channels_src].copy_(conv1.lin_l.weight) if root_weight: - conv2.lin.weight.data[:, in_channels_src:] = conv1.lin_r.weight.data + conv2.lin.weight[:, in_channels_src:].copy_(conv1.lin_r.weight) if bias: - conv2.lin.bias.data[:] = conv1.lin_l.bias.data + conv2.lin.bias.copy_(conv1.lin_l.bias) out1 = conv1(x, edge_index) out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py index fbdb244898b..1776b691c87 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,22 +18,23 @@ ATOL = 1e-6 +@pytest.mark.parametrize("use_edge_index", [True, False]) +@pytest.mark.parametrize("use_edge_attr", [True, False]) @pytest.mark.parametrize("bipartite", [True, False]) @pytest.mark.parametrize("concat", [True, False]) @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) @pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_transformer_conv_equality(bipartite, concat, heads, graph, request): +def test_transformer_conv_equality( + use_edge_index, use_edge_attr, bipartite, concat, heads, graph, request +): pytest.importorskip("torch_geometric", reason="PyG not available") import torch + from torch_geometric import EdgeIndex from torch_geometric.nn import TransformerConv torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() - csc = CuGraphTransformerConv.to_csc(edge_index, size) - - out_channels = 2 - kwargs = dict(concat=concat, bias=False, root_weight=False) if bipartite: in_channels = (5, 3) @@ -44,20 +45,45 @@ def test_transformer_conv_equality(bipartite, concat, heads, graph, request): else: in_channels = 5 x = torch.rand(size[0], in_channels, device="cuda") + out_channels = 2 + + if use_edge_attr: + edge_dim = 3 + edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() + else: + edge_dim = edge_attr = None + + if use_edge_index: + csc = EdgeIndex(edge_index, sparse_size=size) + else: + if use_edge_attr: + csc, edge_attr_perm = CuGraphTransformerConv.to_csc( + edge_index, size, edge_attr=edge_attr + ) + else: + csc = CuGraphTransformerConv.to_csc(edge_index, size) + edge_attr_perm = None + + kwargs = dict(concat=concat, bias=False, edge_dim=edge_dim, root_weight=False) conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda() conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda() with torch.no_grad(): - conv2.lin_query.weight.data = conv1.lin_query.weight.data.detach().clone() - conv2.lin_key.weight.data = conv1.lin_key.weight.data.detach().clone() - conv2.lin_value.weight.data = conv1.lin_value.weight.data.detach().clone() - conv2.lin_query.bias.data = conv1.lin_query.bias.data.detach().clone() - conv2.lin_key.bias.data = conv1.lin_key.bias.data.detach().clone() - conv2.lin_value.bias.data = conv1.lin_value.bias.data.detach().clone() + conv2.lin_query.weight.copy_(conv1.lin_query.weight) + conv2.lin_key.weight.copy_(conv1.lin_key.weight) + conv2.lin_value.weight.copy_(conv1.lin_value.weight) + conv2.lin_query.bias.copy_(conv1.lin_query.bias) + conv2.lin_key.bias.copy_(conv1.lin_key.bias) + conv2.lin_value.bias.copy_(conv1.lin_value.bias) + if use_edge_attr: + conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - out1 = conv1(x, edge_index) - out2 = conv2(x, csc) + out1 = conv1(x, edge_index, edge_attr=edge_attr) + if use_edge_index: + out2 = conv2(x, csc, edge_attr=edge_attr) + else: + out2 = conv2(x, csc, edge_attr=edge_attr_perm) assert torch.allclose(out1, out2, atol=ATOL) @@ -81,3 +107,8 @@ def test_transformer_conv_equality(bipartite, concat, heads, graph, request): assert torch.allclose( conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL ) + + if use_edge_attr: + assert torch.allclose( + conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL + ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py index 9813fa933ee..ab20ef01fd3 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,7 +32,11 @@ torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") + trim_to_layer = import_optional("torch_geometric.utils.trim_to_layer") +if isinstance(trim_to_layer, MissingModule): + trim_to_layer = import_optional("torch_geometric.utils._trim_to_layer") + try: import torch_sparse # noqa: F401 @@ -278,8 +282,8 @@ def test_cugraph_loader_from_disk_subset_csr(): ) assert row.tolist() == bogus_samples.minors.dropna().values_host.tolist() - assert sample["t0"]["num_sampled_nodes"].tolist() == [1, 3, 2] - assert sample["t0", "knows", "t0"]["num_sampled_edges"].tolist() == [3, 5] + assert sample["t0"]["num_sampled_nodes"] == [1, 3, 2] + assert sample["t0", "knows", "t0"]["num_sampled_edges"] == [3, 5] assert num_samples == 100 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_sampler.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_sampler.py index e703d477b70..ed011a658a9 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_sampler.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_sampler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -80,10 +80,10 @@ def test_neighbor_sample(basic_graph_1): # check the hop dictionaries assert len(out.num_sampled_nodes) == 1 - assert out.num_sampled_nodes["vt1"].tolist() == [4, 1] + assert out.num_sampled_nodes["vt1"] == [4, 1] assert len(out.num_sampled_edges) == 1 - assert out.num_sampled_edges[("vt1", "pig", "vt1")].tolist() == [6] + assert out.num_sampled_edges[("vt1", "pig", "vt1")] == [6] @pytest.mark.cugraph_ops @@ -136,15 +136,15 @@ def test_neighbor_sample_multi_vertex(multi_edge_multi_vertex_graph_1): # check the hop dictionaries assert len(out.num_sampled_nodes) == 2 - assert out.num_sampled_nodes["black"].tolist() == [2, 0] - assert out.num_sampled_nodes["brown"].tolist() == [3, 0] + assert out.num_sampled_nodes["black"] == [2, 0] + assert out.num_sampled_nodes["brown"] == [3, 0] assert len(out.num_sampled_edges) == 5 - assert out.num_sampled_edges[("brown", "horse", "brown")].tolist() == [2] - assert out.num_sampled_edges[("brown", "tortoise", "black")].tolist() == [3] - assert out.num_sampled_edges[("brown", "mongoose", "black")].tolist() == [2] - assert out.num_sampled_edges[("black", "cow", "brown")].tolist() == [2] - assert out.num_sampled_edges[("black", "snake", "black")].tolist() == [1] + assert out.num_sampled_edges[("brown", "horse", "brown")] == [2] + assert out.num_sampled_edges[("brown", "tortoise", "black")] == [3] + assert out.num_sampled_edges[("brown", "mongoose", "black")] == [2] + assert out.num_sampled_edges[("black", "cow", "brown")] == [2] + assert out.num_sampled_edges[("black", "snake", "black")] == [1] @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @@ -183,14 +183,14 @@ def test_neighbor_sample_mock_sampling_results(abc_graph): assert out.col[("B", "ba", "A")].tolist() == [1, 1] assert len(out.num_sampled_nodes) == 3 - assert out.num_sampled_nodes["A"].tolist() == [2, 0, 0, 0, 0] - assert out.num_sampled_nodes["B"].tolist() == [0, 2, 0, 0, 0] - assert out.num_sampled_nodes["C"].tolist() == [0, 0, 2, 0, 1] + assert out.num_sampled_nodes["A"] == [2, 0, 0, 0, 0] + assert out.num_sampled_nodes["B"] == [0, 2, 0, 0, 0] + assert out.num_sampled_nodes["C"] == [0, 0, 2, 0, 1] assert len(out.num_sampled_edges) == 3 - assert out.num_sampled_edges[("A", "ab", "B")].tolist() == [3, 0, 1, 0] - assert out.num_sampled_edges[("B", "ba", "A")].tolist() == [0, 1, 0, 1] - assert out.num_sampled_edges[("B", "bc", "C")].tolist() == [0, 2, 0, 2] + assert out.num_sampled_edges[("A", "ab", "B")] == [3, 0, 1, 0] + assert out.num_sampled_edges[("B", "ba", "A")] == [0, 1, 0, 1] + assert out.num_sampled_edges[("B", "bc", "C")] == [0, 2, 0, 2] @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") diff --git a/python/cugraph-pyg/cugraph_pyg/utils/__init__.py b/python/cugraph-pyg/cugraph_pyg/utils/__init__.py new file mode 100644 index 00000000000..aeae6078111 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/utils/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/cugraph-pyg/cugraph_pyg/utils/imports.py b/python/cugraph-pyg/cugraph_pyg/utils/imports.py new file mode 100644 index 00000000000..1cc865a1f35 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/utils/imports.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from packaging.requirements import Requirement +from importlib import import_module + + +def package_available(requirement: str) -> bool: + """Check if a package is installed and meets the version requirement.""" + req = Requirement(requirement) + try: + pkg = import_module(req.name) + except ImportError: + return False + + if len(req.specifier) > 0: + if hasattr(pkg, "__version__"): + return pkg.__version__ in req.specifier + else: + return False + + return True diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py index eafadfa4ff0..2e3d032a20f 100644 --- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py @@ -353,7 +353,7 @@ def uniform_neighbor_sample( else None, h_fan_out=fanout_vals, with_replacement=with_replacement, - do_expensive_check=True, + do_expensive_check=False, with_edge_properties=with_edge_properties, random_state=random_state, prior_sources_behavior=prior_sources_behavior, diff --git a/python/cugraph/cugraph/structure/convert_matrix.py b/python/cugraph/cugraph/structure/convert_matrix.py index ca8e93c482b..b9b9554b870 100644 --- a/python/cugraph/cugraph/structure/convert_matrix.py +++ b/python/cugraph/cugraph/structure/convert_matrix.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -277,6 +277,8 @@ def from_pandas_edgelist( renumber=True, ): """ + See :func:`networkx.convert_matrix.from_pandas_edgelist`. + Initialize a graph from the edge list. It is an error to call this method on an initialized Graph object. Source argument is source column name and destination argument is destination column name. diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 1c73ebb0216..9f0980d4199 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -93,6 +93,3 @@ def test_mg_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index 4530dd3da86..4764c01f0fc 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -84,5 +84,3 @@ def test_mg_edge_betweenness_centrality( second_key="ref_bc", epsilon=DEFAULT_EPSILON, ) - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index db34c68a054..6d1f53f7fc3 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION.: +# Copyright (c) 2020-2024, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -116,6 +116,8 @@ def calc_betweenness_centrality( create_using=cugraph.Graph(directed=directed), ignore_weights=not edgevals, ) + if multi_gpu_batch: + G.enable_batch() M = G.to_pandas_edgelist().rename( columns={"src": "0", "dst": "1", "wgt": edge_attr} @@ -130,8 +132,6 @@ def calc_betweenness_centrality( ) assert G is not None and Gnx is not None - if multi_gpu_batch: - G.enable_batch() calc_func = None if k is not None and seed is not None: diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index c94c2dcaff6..35e199093ce 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -49,14 +49,12 @@ def setup_function(): def get_sg_graph(dataset, directed): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -96,7 +94,6 @@ def test_dask_mg_betweenness_centrality( benchmark, ): g = get_sg_graph(dataset, directed) - dataset.unload() dg = get_mg_graph(dataset, directed) random_state = subset_seed @@ -143,6 +140,3 @@ def test_dask_mg_betweenness_centrality( diff = cupy.isclose(mg_bc_results, sg_bc_results) assert diff.all() - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index 68daff9238c..8606649c745 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -45,14 +45,12 @@ def setup_function(): def get_sg_graph(dataset, directed): - dataset.unload() G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) return G def get_mg_graph(dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( @@ -118,6 +116,3 @@ def test_dask_mg_degree(dask_client, dataset, directed): check_names=False, check_dtype=False, ) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index 80acfe1c4ad..5b83a05e2a2 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -47,7 +47,6 @@ def setup_function(): def get_sg_graph(dataset, directed, edge_ids): - dataset.unload() df = dataset.get_edgelist() if edge_ids: if not directed: @@ -71,7 +70,6 @@ def get_sg_graph(dataset, directed, edge_ids): def get_mg_graph(dataset, directed, edge_ids, weight): - dataset.unload() ddf = dataset.get_dask_edgelist() if weight: @@ -178,6 +176,3 @@ def test_dask_mg_edge_betweenness_centrality( assert len(edge_bc_diffs1) == 0 assert len(edge_bc_diffs2) == 0 - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index 8cd77fb5e24..3a840c82e95 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -52,7 +52,6 @@ def setup_function(): def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -89,15 +88,11 @@ def test_dask_mg_eigenvector_centrality(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): dataset = DATASETS[0] - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -110,6 +105,3 @@ def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): with pytest.warns(UserWarning, match=warning_msg): dcg.eigenvector_centrality(dg) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index ebbe5974814..5dcbd8173df 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -53,7 +53,6 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed): input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -95,16 +94,12 @@ def test_dask_mg_katz_centrality(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=True) @@ -136,14 +131,10 @@ def test_dask_mg_katz_centrality_nstart(dask_client, dataset, directed): err = err + 1 assert err == 0 - # Clean-up stored dataset edge-lists - dataset.unload() - @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): - dataset.unload() ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=True) dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) @@ -156,6 +147,3 @@ def test_dask_mg_katz_centrality_transposed_false(dask_client, dataset): with pytest.warns(UserWarning, match=warning_msg): dcg.katz_centrality(dg) - - # Clean-up stored dataset edge-lists - dataset.unload() diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index 75462924c9d..d096eb7e5c2 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -16,10 +16,9 @@ import pytest import cugraph.dask as dcg -import cudf -import dask_cudf import cugraph -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import karate, dolphins + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -30,12 +29,36 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate, dolphins] IS_DIRECTED = [True, False] -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_pagerank_result(dataset, is_mg): + """Return the cugraph.pagerank result for an MG or SG graph""" + + if is_mg: + dg = dataset.get_dask_graph(store_transposed=True) + return dcg.pagerank(dg).compute() + else: + g = dataset.get_graph(store_transposed=True) + return cugraph.pagerank(g) + + +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) def test_dask_mg_pagerank(dask_client, directed): @@ -43,62 +66,17 @@ def test_dask_mg_pagerank(dask_client, directed): # Initialize and run pagerank on two distributed graphs # with same communicator - input_data_path1 = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() + input_data_path1 = karate.get_path() print(f"dataset1={input_data_path1}") - chunksize1 = dcg.get_chunksize(input_data_path1) + result_pr1 = get_pagerank_result(karate, is_mg=True) - input_data_path2 = (RAPIDS_DATASET_ROOT_DIR_PATH / "dolphins.csv").as_posix() + input_data_path2 = dolphins.get_path() print(f"dataset2={input_data_path2}") - chunksize2 = dcg.get_chunksize(input_data_path2) - - ddf1 = dask_cudf.read_csv( - input_data_path1, - blocksize=chunksize1, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg1 = cugraph.Graph(directed=directed) - dg1.from_dask_cudf_edgelist(ddf1, "src", "dst") - - result_pr1 = dcg.pagerank(dg1).compute() - - ddf2 = dask_cudf.read_csv( - input_data_path2, - blocksize=chunksize2, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg2 = cugraph.Graph(directed=directed) - dg2.from_dask_cudf_edgelist(ddf2, "src", "dst") - - result_pr2 = dcg.pagerank(dg2).compute() + result_pr2 = get_pagerank_result(dolphins, is_mg=True) # Calculate single GPU pagerank for verification of results - df1 = cudf.read_csv( - input_data_path1, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g1 = cugraph.Graph(directed=directed) - g1.from_cudf_edgelist(df1, "src", "dst") - expected_pr1 = cugraph.pagerank(g1) - - df2 = cudf.read_csv( - input_data_path2, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g2 = cugraph.Graph(directed=directed) - g2.from_cudf_edgelist(df2, "src", "dst") - expected_pr2 = cugraph.pagerank(g2) + expected_pr1 = get_pagerank_result(karate, is_mg=False) + expected_pr2 = get_pagerank_result(dolphins, is_mg=False) # Compare and verify pagerank results diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 45ec8eca0e8..311fd7a24bc 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -17,7 +17,6 @@ import cugraph import cugraph.dask as dcg -import dask_cudf from cudf.testing.testing import assert_frame_equal from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.datasets import karate, dolphins, email_Eu_core @@ -36,11 +35,13 @@ def setup_function(): # Parameters # ============================================================================= + DATASETS = [karate, dolphins, email_Eu_core] IS_DIRECTED = [True, False] NUM_VERTICES = [2, 5, 10, 20] OFFSETS = [None] + # ============================================================================= # Helper functions # ============================================================================= @@ -53,15 +54,7 @@ def get_sg_graph(dataset, directed): def get_mg_graph(dataset, directed): - input_data_path = dataset.get_path() - blocksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=blocksize, - delimiter=dataset.metadata["delim"], - names=dataset.metadata["col_names"], - dtype=dataset.metadata["col_types"], - ) + ddf = dataset.get_dask_edgelist() dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( ddf, @@ -108,7 +101,7 @@ def test_mg_induced_subgraph( # FIXME: This parameter is not yet tested # mg_offsets = mg_offsets.compute().reset_index(drop=True) - mg_df, mg_offsets = result_induced_subgraph + mg_df, _ = result_induced_subgraph if mg_df is not None and sg_induced_subgraph is not None: # FIXME: 'edges()' or 'view_edgelist()' takes half the edges out if diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index b1908ae10a2..2904ecd12a2 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -13,123 +13,56 @@ import pytest - -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils - +from cugraph.datasets import karate_asymmetric, karate, dolphins -try: - from rapids_pytest_benchmark import setFixtureParamNames -except ImportError: - print( - "\n\nWARNING: rapids_pytest_benchmark is not installed, " - "falling back to pytest_benchmark fixtures.\n" - ) - # if rapids_pytest_benchmark is not available, just perfrom time-only - # benchmarking and replace the util functions with nops - import pytest_benchmark +# ============================================================================= +# Parameters +# ============================================================================= - gpubenchmark = pytest_benchmark.plugin.benchmark - def setFixtureParamNames(*args, **kwargs): - pass +DATASETS = [karate, dolphins] +DATASETS_ASYMMETRIC = [karate_asymmetric] # ============================================================================= -# Parameters +# Helper Functions # ============================================================================= -DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"] - - -############################################################################### -# Fixtures -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.fixture( - scope="module", - params=DATASETS_ASYMMETRIC, - ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC], -) -def daskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates a directed Graph. - """ - # Since parameterized fixtures do not assign param names to param values, - # manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg -@pytest.fixture( - scope="module", - params=utils.DATASETS_UNDIRECTED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED], -) -def uddaskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates an undirected Graph. - """ - # Since parameterized fixtures do not assign param names to param - # values, manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") +def get_mg_graph(dataset, directed): + """Returns an MG graph""" + ddf = dataset.get_dask_edgelist() + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt") + return dg -############################################################################### +# ============================================================================= # Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= # FIXME: Implement more robust tests + + @pytest.mark.mg -def test_mg_leiden_with_edgevals_directed_graph(daskGraphFromDataset): +@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC) +def test_mg_leiden_with_edgevals_directed_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=True) # Directed graphs are not supported by Leiden and a ValueError should be # raised with pytest.raises(ValueError): - parts, mod = dcg.leiden(daskGraphFromDataset) + parts, mod = dcg.leiden(dg) -############################################################################### -# Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -# FIXME: Implement more robust tests @pytest.mark.mg -def test_mg_leiden_with_edgevals_undirected_graph(uddaskGraphFromDataset): - parts, mod = dcg.leiden(uddaskGraphFromDataset) +@pytest.mark.parametrize("dataset", DATASETS) +def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=False) + parts, mod = dcg.leiden(dg) # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to. diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py index 19fffe96b5c..0dff7f1c8b0 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py +++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py @@ -14,122 +14,41 @@ import pytest import cugraph.dask as dcg +from cugraph.datasets import karate_asymmetric, karate, dolphins -import cugraph -import dask_cudf -from cugraph.testing import utils - - -try: - from rapids_pytest_benchmark import setFixtureParamNames -except ImportError: - print( - "\n\nWARNING: rapids_pytest_benchmark is not installed, " - "falling back to pytest_benchmark fixtures.\n" - ) - - # if rapids_pytest_benchmark is not available, just perfrom time-only - # benchmarking and replace the util functions with nops - import pytest_benchmark - - gpubenchmark = pytest_benchmark.plugin.benchmark - - def setFixtureParamNames(*args, **kwargs): - pass +from test_leiden_mg import get_mg_graph # ============================================================================= # Parameters # ============================================================================= -DATASETS_ASYMMETRIC = [utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv"] - - -############################################################################### -# Fixtures -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.fixture( - scope="module", - params=DATASETS_ASYMMETRIC, - ids=[f"dataset={d.as_posix()}" for d in DATASETS_ASYMMETRIC], -) -def daskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates a directed Graph. - """ - # Since parameterized fixtures do not assign param names to param values, - # manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param - - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg -@pytest.fixture( - scope="module", - params=utils.DATASETS_UNDIRECTED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNDIRECTED], -) -def uddaskGraphFromDataset(request, dask_client): - """ - Returns a new dask dataframe created from the dataset file param. - This creates an undirected Graph. - """ - # Since parameterized fixtures do not assign param names to param - # values, manually call the helper to do so. - setFixtureParamNames(request, ["dataset"]) - dataset = request.param +DATASETS_ASYMMETRIC = DATASETS_ASYMMETRIC = [karate_asymmetric] +DATASETS = [karate, dolphins] - chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv( - dataset, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") - return dg - - -############################################################################### +# ============================================================================= # Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= # FIXME: Implement more robust tests + + @pytest.mark.mg -def test_mg_louvain_with_edgevals_directed_graph(daskGraphFromDataset): +@pytest.mark.parametrize("dataset", DATASETS_ASYMMETRIC) +def test_mg_louvain_with_edgevals_directed_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=True) # Directed graphs are not supported by Louvain and a ValueError should be # raised with pytest.raises(ValueError): - parts, mod = dcg.louvain(daskGraphFromDataset) + parts, mod = dcg.louvain(dg) -############################################################################### -# Tests -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -# FIXME: Implement more robust tests @pytest.mark.mg -def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset): - parts, mod = dcg.louvain(uddaskGraphFromDataset) +@pytest.mark.parametrize("dataset", DATASETS) +def test_mg_louvain_with_edgevals_undirected_graph(dask_client, dataset): + dg = get_mg_graph(dataset, directed=False) + parts, mod = dcg.louvain(dg) # FIXME: either call Nx with the same dataset and compare results, or # hardcode golden results to compare to. diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 0a052845cf8..e2c47af8a1b 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -16,115 +16,81 @@ import random import pytest -import cudf -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils -from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.datasets import karate, dolphins # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([True, False], "start_list"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "start_list", "edgevals"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the triangle - count algo. - """ - start_list = input_combo["start_list"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) - input_combo["SGGraph"] = G - if start_list: +DATASETS = [karate, dolphins] +START_LIST = [True, False] + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_sg_graph(dataset, directed, start): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + if start: # sample k nodes from the cuGraph graph - k = random.randint(1, 10) - srcs = G.view_edge_list()[G.source_columns] - dsts = G.view_edge_list()[G.destination_columns] - nodes = cudf.concat([srcs, dsts]).drop_duplicates() - start_list = nodes.sample(k) + start = G.select_random_vertices(num_vertices=random.randint(1, 10)) else: - start_list = None + start = None - sg_triangle_results = cugraph.triangle_count(G, start_list) - sg_triangle_results = sg_triangle_results.sort_values("vertex").reset_index( - drop=True - ) + return G, start - input_combo["sg_triangle_results"] = sg_triangle_results - input_combo["start_list"] = start_list - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - dg = cugraph.Graph(directed=False) +def get_mg_graph(dataset, directed): + ddf = dataset.get_dask_edgelist() + dg = cugraph.Graph(directed=directed) dg.from_dask_cudf_edgelist( - ddf, source="src", destination="dst", edge_attr="value", renumber=True + ddf, source="src", destination="dst", edge_attr="wgt", renumber=True ) - input_combo["MGGraph"] = dg - - return input_combo + return dg # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_triangles(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("start", START_LIST) +def test_sg_triangles(dask_client, dataset, start, benchmark): # This test is only for benchmark purposes. sg_triangle_results = None - G = input_expected_output["SGGraph"] - start_list = input_expected_output["start_list"] - sg_triangle_results = benchmark(cugraph.triangle_count, G, start_list) + G, start = get_sg_graph(dataset, False, start) + + sg_triangle_results = benchmark(cugraph.triangle_count, G, start) + sg_triangle_results.sort_values("vertex").reset_index(drop=True) assert sg_triangle_results is not None @pytest.mark.mg -def test_triangles(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - start_list = input_expected_output["start_list"] - - result_counts = benchmark(dcg.triangle_count, dg, start_list) +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("start", START_LIST) +def test_triangles(dask_client, dataset, start, benchmark): + G, start = get_sg_graph(dataset, False, start) + dg = get_mg_graph(dataset, False) + result_counts = benchmark(dcg.triangle_count, dg, start) result_counts = ( result_counts.drop_duplicates() .compute() @@ -132,8 +98,9 @@ def test_triangles(dask_client, benchmark, input_expected_output): .reset_index(drop=True) .rename(columns={"counts": "mg_counts"}) ) - - expected_output = input_expected_output["sg_triangle_results"] + expected_output = ( + cugraph.triangle_count(G, start).sort_values("vertex").reset_index(drop=True) + ) # Update the mg triangle count with sg triangle count results # for easy comparison using cuDF DataFrame methods. diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index 26e8ed17bcb..4ab251c0e29 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -15,11 +15,9 @@ import pytest -import cudf -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.datasets import netscience # ============================================================================= @@ -31,42 +29,47 @@ def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [netscience] # Directed graph is not currently supported IS_DIRECTED = [False, True] -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) +# ============================================================================= +# Helper +# ============================================================================= + + +def get_mg_graph(dataset, directed): + """Returns an MG graph""" + ddf = dataset.get_dask_edgelist() + + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt") + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_wcc(dask_client, directed): - - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() +def test_dask_mg_wcc(dask_client, dataset, directed): + input_data_path = dataset.get_path() print(f"dataset={input_data_path}") - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst", renumber=True) - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + g = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + dg = get_mg_graph(dataset, directed) + # breakpoint() if not directed: expected_dist = cugraph.weakly_connected_components(g) result_dist = dcg.weakly_connected_components(dg) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index f771ce513eb..3d9a7bef5be 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -15,107 +15,64 @@ import pytest -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils -from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.datasets import karate, dolphins, karate_asymmetric # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED -degree_type = ["incoming", "outgoing", "bidirectional"] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - (degree_type, "degree_type"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "degree_type"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the Core number - algo. - """ - degree_type = input_combo["degree_type"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) - input_combo["SGGraph"] = G - sg_core_number_results = cugraph.core_number(G, degree_type) - sg_core_number_results = sg_core_number_results.sort_values("vertex").reset_index( - drop=True - ) +DATASETS = [karate, dolphins] +DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] - input_combo["sg_core_number_results"] = sg_core_number_results - input_combo["degree_type"] = degree_type - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist( - ddf, source="src", destination="dst", edge_attr="value", renumber=True - ) +# ============================================================================= +# Helper Functions +# ============================================================================= - input_combo["MGGraph"] = dg - return input_combo +def get_sg_results(dataset, degree_type): + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) + res = cugraph.core_number(G, degree_type) + res = res.sort_values("vertex").reset_index(drop=True) + return res # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_core_number(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_core_number(dask_client, dataset, degree_type, benchmark): # This test is only for benchmark purposes. sg_core_number_results = None - G = input_expected_output["SGGraph"] - degree_type = input_expected_output["degree_type"] - + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) assert sg_core_number_results is not None @pytest.mark.mg -def test_core_number(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - degree_type = input_expected_output["degree_type"] +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_core_number(dask_client, dataset, degree_type, benchmark): + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) result_core_number = benchmark(dcg.core_number, dg, degree_type) - result_core_number = ( result_core_number.drop_duplicates() .compute() @@ -124,7 +81,7 @@ def test_core_number(dask_client, benchmark, input_expected_output): .rename(columns={"core_number": "mg_core_number"}) ) - expected_output = input_expected_output["sg_core_number_results"] + expected_output = get_sg_results(dataset, degree_type) # Update the mg core number with sg core number results # for easy comparison using cuDF DataFrame methods. @@ -135,30 +92,10 @@ def test_core_number(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_core_number_invalid_input(input_expected_output): - input_data_path = ( - utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate-asymmetric.csv" - ).as_posix() - - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - ) +def test_core_number_invalid_input(): + dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True)) invalid_degree_type = 3 - dg = input_expected_output["MGGraph"] + with pytest.raises(ValueError): dcg.core_number(dg, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index b2ac18cf3a9..c7ad6d2d41d 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -1,4 +1,5 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. + # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,58 +16,39 @@ import pytest -import dask_cudf import cugraph import cugraph.dask as dcg -from cugraph.testing import utils +from cugraph.datasets import karate, dolphins from cudf.testing.testing import assert_frame_equal from cugraph.structure.symmetrize import symmetrize_df -from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() # ============================================================================= -# Pytest fixtures +# Parameters # ============================================================================= -datasets = utils.DATASETS_UNDIRECTED - -core_number = [True, False] -degree_type = ["bidirectional", "outgoing", "incoming"] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), (core_number, "core_number"), (degree_type, "degree_type") -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - parameters = dict(zip(("graph_file", "core_number", "degree_type"), request.param)) - - return parameters - - -@pytest.fixture(scope="module") -def input_expected_output(dask_client, input_combo): - """ - This fixture returns the inputs and expected results from the Core number - algo. - """ - core_number = input_combo["core_number"] - degree_type = input_combo["degree_type"] - input_data_path = input_combo["graph_file"] - G = utils.generate_cugraph_graph_from_file( - input_data_path, directed=False, edgevals=True - ) + + +DATASETS = [karate, dolphins] +CORE_NUMBER = [True, False] +DEGREE_TYPE = ["bidirectional", "outgoing", "incoming"] + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_sg_results(dataset, core_number, degree_type): + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) if core_number: # compute the core_number @@ -74,62 +56,41 @@ def input_expected_output(dask_client, input_combo): else: core_number = None - input_combo["core_number"] = core_number - - input_combo["SGGraph"] = G - sg_k_core_graph = cugraph.k_core( G, core_number=core_number, degree_type=degree_type ) - sg_k_core_results = sg_k_core_graph.view_edge_list() + res = sg_k_core_graph.view_edge_list() # FIXME: The result will come asymetric. Symmetrize the results srcCol = sg_k_core_graph.source_columns dstCol = sg_k_core_graph.destination_columns wgtCol = sg_k_core_graph.weight_column - sg_k_core_results = ( - symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol) + res = ( + symmetrize_df(res, srcCol, dstCol, wgtCol) .sort_values([srcCol, dstCol]) .reset_index(drop=True) ) - input_combo["sg_k_core_results"] = sg_k_core_results - - # Creating an edgelist from a dask cudf dataframe - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=False) - # FIXME: False when renumbering (C++ and python renumbering) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - ) - - input_combo["MGGraph"] = dg - - return input_combo + return res, core_number # ============================================================================= # Tests # ============================================================================= + + @pytest.mark.mg -def test_sg_k_core(dask_client, benchmark, input_expected_output): +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("core_number", CORE_NUMBER) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_k_core(dask_client, dataset, core_number, degree_type, benchmark): # This test is only for benchmark purposes. sg_k_core = None - G = input_expected_output["SGGraph"] - core_number = input_expected_output["core_number"] - degree_type = input_expected_output["degree_type"] - + G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) + if core_number: + # compute the core_number + core_number = cugraph.core_number(G, degree_type=degree_type) + else: + core_number = None sg_k_core = benchmark( cugraph.k_core, G, core_number=core_number, degree_type=degree_type ) @@ -137,15 +98,16 @@ def test_sg_k_core(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_mg_k_core(dask_client, benchmark, input_expected_output): - - dg = input_expected_output["MGGraph"] - core_number = input_expected_output["core_number"] +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("core_number", CORE_NUMBER) +@pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_dask_mg_k_core(dask_client, dataset, core_number, degree_type, benchmark): + expected_k_core_results, core_number = get_sg_results( + dataset, core_number, degree_type + ) + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) k_core_results = benchmark(dcg.k_core, dg, core_number=core_number) - - expected_k_core_results = input_expected_output["sg_k_core_results"] - k_core_results = ( k_core_results.compute() .sort_values(["src", "dst"]) @@ -160,36 +122,13 @@ def test_dask_mg_k_core(dask_client, benchmark, input_expected_output): @pytest.mark.mg def test_dask_mg_k_core_invalid_input(dask_client): - input_data_path = datasets[0] - chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) + dataset = DATASETS[0] + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=True)) - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - renumber=True, - store_transposed=True, - ) with pytest.raises(ValueError): dcg.k_core(dg) - dg = cugraph.Graph(directed=False) - dg.from_dask_cudf_edgelist( - ddf, - source="src", - destination="dst", - edge_attr="value", - store_transposed=True, - ) + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) degree_type = "invalid" with pytest.raises(ValueError): diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py index 45a3c46309d..64917d0c747 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py @@ -24,33 +24,61 @@ import dask_cudf import cugraph.dask as dcg import cugraph +from cugraph.datasets import karate, karate_disjoint from cugraph.testing import utils from cugraph.structure.number_map import NumberMap from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH from cudf.testing import assert_frame_equal, assert_series_equal # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate] +DATASETS_UNRENUMBERED = [karate_disjoint] IS_DIRECTED = [True, False] +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_sg_graph(dataset, directed): + dataset.unload() + g = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + + return g + + +def get_mg_graph(dataset, directed): + dataset.unload() + dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=directed)) + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber(graph_file, dask_client): - - M = utils.read_csv_for_nx(graph_file) +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber(dataset, dask_client): + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) @@ -96,13 +124,9 @@ def test_mg_renumber(graph_file, dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client): - M = utils.read_csv_for_nx(graph_file) +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber_add_internal_vertex_id(dataset, dask_client): + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) @@ -131,33 +155,13 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_mg_pagerank(dask_client, directed): +def test_dask_mg_pagerank(dask_client, dataset, directed): pandas.set_option("display.max_rows", 10000) - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - blocksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.Graph(directed=directed) - g.from_cudf_edgelist(df, "src", "dst") - - dg = cugraph.Graph(directed=directed) - dg.from_dask_cudf_edgelist(ddf, "src", "dst") + g = get_sg_graph(dataset, directed) + dg = get_mg_graph(dataset, directed) expected_pr = cugraph.pagerank(g) result_pr = dcg.pagerank(dg).compute() @@ -178,20 +182,18 @@ def test_dask_mg_pagerank(dask_client, directed): print("Mismatches:", err) assert err == 0 + dataset.unload() + @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -@pytest.mark.parametrize( - "graph_file", - utils.DATASETS_UNRENUMBERED, - ids=[f"dataset={d.as_posix()}" for d in utils.DATASETS_UNRENUMBERED], -) -def test_mg_renumber_common_col_names(graph_file, dask_client): +@pytest.mark.parametrize("dataset", DATASETS_UNRENUMBERED) +def test_mg_renumber_common_col_names(dataset, dask_client): """ Ensure that commonly-used column names in the input do not conflict with names used internally by NumberMap. """ - M = utils.read_csv_for_nx(graph_file) + M = utils.read_csv_for_nx(dataset.get_path()) sources = cudf.Series(M["0"]) destinations = cudf.Series(M["1"]) diff --git a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py index 3bdb5c079ef..09936e954e8 100644 --- a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,73 +17,54 @@ import dask_cudf import numpy as np -from cugraph.testing import UNDIRECTED_DATASETS, karate_disjoint - +from cugraph.datasets import karate, dolphins, karate_disjoint from cugraph.structure.replicate_edgelist import replicate_edgelist from cudf.testing.testing import assert_frame_equal -from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() +# ============================================================================= +# Parameters +# ============================================================================= + + edgeWeightCol = "weights" edgeIdCol = "edge_id" edgeTypeCol = "edge_type" srcCol = "src" dstCol = "dst" - -input_data = UNDIRECTED_DATASETS + [karate_disjoint] -datasets = [pytest.param(d) for d in input_data] - -fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), - ([True, False], "distributed"), - ([True, False], "use_weights"), - ([True, False], "use_edge_ids"), - ([True, False], "use_edge_type_ids"), -) - - -@pytest.fixture(scope="module", params=fixture_params) -def input_combo(request): - """ - Simply return the current combination of params as a dictionary for use in - tests or other parameterized fixtures. - """ - return dict( - zip( - ( - "graph_file", - "use_weights", - "use_edge_ids", - "use_edge_type_ids", - "distributed", - ), - request.param, - ) - ) +DATASETS = [karate, dolphins, karate_disjoint] +IS_DISTRIBUTED = [True, False] +USE_WEIGHTS = [True, False] +USE_EDGE_IDS = [True, False] +USE_EDGE_TYPE_IDS = [True, False] # ============================================================================= # Tests # ============================================================================= -# @pytest.mark.skipif( -# is_single_gpu(), reason="skipping MG testing on Single GPU system" -# ) -@pytest.mark.mg -def test_mg_replicate_edgelist(dask_client, input_combo): - df = input_combo["graph_file"].get_edgelist() - distributed = input_combo["distributed"] - use_weights = input_combo["use_weights"] - use_edge_ids = input_combo["use_edge_ids"] - use_edge_type_ids = input_combo["use_edge_type_ids"] + +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("distributed", IS_DISTRIBUTED) +@pytest.mark.parametrize("use_weights", USE_WEIGHTS) +@pytest.mark.parametrize("use_edge_ids", USE_EDGE_IDS) +@pytest.mark.parametrize("use_edge_type_ids", USE_EDGE_TYPE_IDS) +def test_mg_replicate_edgelist( + dask_client, dataset, distributed, use_weights, use_edge_ids, use_edge_type_ids +): + dataset.unload() + df = dataset.get_edgelist() columns = [srcCol, dstCol] weight = None diff --git a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py index 05cc06e6282..913443fe400 100644 --- a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,6 +25,8 @@ # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini index ebbd6a7eaad..675a6cf8fde 100644 --- a/python/cugraph/pytest.ini +++ b/python/cugraph/pytest.ini @@ -59,5 +59,14 @@ python_functions = filterwarnings = error:::cudf + error::FutureWarning + error::DeprecationWarning + # TODO + ignore:Multi is deprecated and the removal of multi edges will no longer be supported:FutureWarning + ignore:The legacy column names:FutureWarning + ignore:The include_hop_column flag is deprecated and will be removed:FutureWarning + ignore:Calling uniform_neighbor_sample with the:FutureWarning + ignore:The with_edge_properties flag is deprecated and will be removed:FutureWarning + ignore:This function is deprecated. Batched support for multiple vertices:DeprecationWarning # Called via dask. Not obviously addressable in cugraph. - ignore:The behavior of array concatenation with empty entries is deprecated:FutureWarning:cudf + ignore:The behavior of array concatenation with empty entries is deprecated:FutureWarning diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index 75b5c1c5aa9..27825585c28 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -216,6 +216,8 @@ Below is the list of algorithms that are currently supported in nx-cugraph. └─ wheel_graph community └─ caveman_graph +ego + └─ ego_graph small ├─ bull_graph ├─ chvatal_graph diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index edc96983b8f..f57b90eb402 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -77,6 +77,7 @@ "diamond_graph", "dodecahedral_graph", "edge_betweenness_centrality", + "ego_graph", "eigenvector_centrality", "empty_graph", "florentine_families_graph", @@ -163,6 +164,7 @@ "clustering": "Directed graphs and `weight` parameter are not yet supported.", "core_number": "Directed graphs are not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", + "ego_graph": "Weighted ego_graph with negative cycles is not yet supported. `NotImplementedError` will be raised if there are negative `distance` edge weights.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", "generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.", @@ -191,6 +193,9 @@ "bellman_ford_path_length": { "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.", }, + "ego_graph": { + "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.", + }, "eigenvector_centrality": { "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.", }, diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index d220cb18df3..c4422ffb97d 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.16 + rev: v0.17 hooks: - id: validate-pyproject name: Validate pyproject.toml @@ -50,7 +50,7 @@ repos: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.2 + rev: v0.4.4 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] @@ -62,7 +62,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==7.0.0 - - flake8-bugbear==24.4.21 + - flake8-bugbear==24.4.26 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.2 + rev: v0.4.4 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index f265540a161..b34245d5031 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -562,7 +562,12 @@ def to_networkx(G: nxcg.Graph, *, sort_edges: bool = False) -> nx.Graph: dst_iter = map(id_to_key.__getitem__, dst_indices) if G.is_multigraph() and (G.edge_keys is not None or G.edge_indices is not None): if G.edge_keys is not None: - edge_keys = G.edge_keys + if not G.is_directed(): + edge_keys = [k for k, m in zip(G.edge_keys, mask.tolist()) if m] + else: + edge_keys = G.edge_keys + elif not G.is_directed(): + edge_keys = G.edge_indices[mask].tolist() else: edge_keys = G.edge_indices.tolist() if edge_values: diff --git a/python/nx-cugraph/nx_cugraph/generators/__init__.py b/python/nx-cugraph/nx_cugraph/generators/__init__.py index c1834a4dec7..60a9d92373a 100644 --- a/python/nx-cugraph/nx_cugraph/generators/__init__.py +++ b/python/nx-cugraph/nx_cugraph/generators/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,5 +12,6 @@ # limitations under the License. from .classic import * from .community import * +from .ego import * from .small import * from .social import * diff --git a/python/nx-cugraph/nx_cugraph/generators/ego.py b/python/nx-cugraph/nx_cugraph/generators/ego.py new file mode 100644 index 00000000000..66c9c8b95ee --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/ego.py @@ -0,0 +1,161 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import cupy as cp +import networkx as nx +import numpy as np +import pylibcugraph as plc + +import nx_cugraph as nxcg + +from ..utils import _dtype_param, _get_float_dtype, index_dtype, networkx_algorithm + +__all__ = ["ego_graph"] + + +@networkx_algorithm( + extra_params=_dtype_param, version_added="24.06", _plc={"bfs", "ego_graph", "sssp"} +) +def ego_graph( + G, n, radius=1, center=True, undirected=False, distance=None, *, dtype=None +): + """Weighted ego_graph with negative cycles is not yet supported. `NotImplementedError` will be raised if there are negative `distance` edge weights.""" # noqa: E501 + if isinstance(G, nx.Graph): + G = nxcg.from_networkx(G, preserve_all_attrs=True) + if n not in G: + if distance is None: + raise nx.NodeNotFound(f"Source {n} is not in G") + raise nx.NodeNotFound(f"Node {n} not found in graph") + src_index = n if G.key_to_id is None else G.key_to_id[n] + symmetrize = "union" if undirected and G.is_directed() else None + if distance is None or distance not in G.edge_values: + # Simple BFS to determine nodes + if radius is not None and radius <= 0: + if center: + node_ids = cp.array([src_index], dtype=index_dtype) + else: + node_ids = cp.empty(0, dtype=index_dtype) + node_mask = None + else: + if radius is None or np.isinf(radius): + radius = -1 + else: + radius = math.ceil(radius) + distances, unused_predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(symmetrize=symmetrize), + sources=cp.array([src_index], index_dtype), + direction_optimizing=False, # True for undirected only; what's best? + depth_limit=radius, + compute_predecessors=False, + do_expensive_check=False, + ) + node_mask = distances != np.iinfo(distances.dtype).max + else: + # SSSP to determine nodes + if callable(distance): + raise NotImplementedError("callable `distance` argument is not supported") + if symmetrize and G.is_multigraph(): + # G._get_plc_graph does not implement `symmetrize=True` w/ edge array + raise NotImplementedError( + "Weighted ego_graph with undirected=True not implemented" + ) + # Check for negative values since we don't support negative cycles + edge_vals = G.edge_values[distance] + if distance in G.edge_masks: + edge_vals = edge_vals[G.edge_masks[distance]] + if (edge_vals < 0).any(): + raise NotImplementedError( + "Negative edge weights not yet supported by ego_graph" + ) + # PERF: we could use BFS if all edges are equal + if radius is None: + radius = np.inf + dtype = _get_float_dtype(dtype, graph=G, weight=distance) + node_ids, distances, unused_predecessors = plc.sssp( + resource_handle=plc.ResourceHandle(), + graph=(G.to_undirected() if symmetrize else G)._get_plc_graph( + distance, 1, dtype + ), + source=src_index, + cutoff=np.nextafter(radius, np.inf, dtype=np.float64), + compute_predecessors=True, # TODO: False is not yet supported + do_expensive_check=False, + ) + node_mask = distances != np.finfo(distances.dtype).max + + if node_mask is not None: + if not center: + node_mask &= node_ids != src_index + node_ids = node_ids[node_mask] + if node_ids.size == G._N: + return G.copy() + # TODO: create renumbering helper function(s) + node_ids.sort() # TODO: is this ever necessary? Keep for safety + node_values = {key: val[node_ids] for key, val in G.node_values.items()} + node_masks = {key: val[node_ids] for key, val in G.node_masks.items()} + + G._sort_edge_indices() # TODO: is this ever necessary? Keep for safety + edge_mask = cp.isin(G.src_indices, node_ids) & cp.isin(G.dst_indices, node_ids) + src_indices = cp.searchsorted(node_ids, G.src_indices[edge_mask]).astype( + index_dtype + ) + dst_indices = cp.searchsorted(node_ids, G.dst_indices[edge_mask]).astype( + index_dtype + ) + edge_values = {key: val[edge_mask] for key, val in G.edge_values.items()} + edge_masks = {key: val[edge_mask] for key, val in G.edge_masks.items()} + + # Renumber nodes + if (id_to_key := G.id_to_key) is not None: + key_to_id = { + id_to_key[old_index]: new_index + for new_index, old_index in enumerate(node_ids.tolist()) + } + else: + key_to_id = { + old_index: new_index + for new_index, old_index in enumerate(node_ids.tolist()) + } + kwargs = { + "N": node_ids.size, + "src_indices": src_indices, + "dst_indices": dst_indices, + "edge_values": edge_values, + "edge_masks": edge_masks, + "node_values": node_values, + "node_masks": node_masks, + "key_to_id": key_to_id, + } + if G.is_multigraph(): + if G.edge_keys is not None: + kwargs["edge_keys"] = [ + x for x, m in zip(G.edge_keys, edge_mask.tolist()) if m + ] + if G.edge_indices is not None: + kwargs["edge_indices"] = G.edge_indices[edge_mask] + rv = G.__class__.from_coo(**kwargs) + rv.graph.update(G.graph) + return rv + + +@ego_graph._can_run +def _(G, n, radius=1, center=True, undirected=False, distance=None, *, dtype=None): + if distance is not None and undirected and G.is_directed() and G.is_multigraph(): + return "Weighted ego_graph with undirected=True not implemented" + if distance is not None and nx.is_negatively_weighted(G, weight=distance): + return "Weighted ego_graph with negative cycles not yet supported" + if callable(distance): + return "callable `distance` argument is not supported" + return True diff --git a/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py b/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py new file mode 100644 index 00000000000..5474f9d79e3 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +import nx_cugraph as nxcg + +from .testing_utils import assert_graphs_equal + +nxver = parse(nx.__version__) + + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test ego_graph", allow_module_level=True) + + +@pytest.mark.parametrize( + "create_using", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph] +) +@pytest.mark.parametrize("radius", [-1, 0, 1, 1.5, 2, float("inf"), None]) +@pytest.mark.parametrize("center", [True, False]) +@pytest.mark.parametrize("undirected", [False, True]) +@pytest.mark.parametrize("multiple_edges", [False, True]) +@pytest.mark.parametrize("n", [0, 3]) +def test_ego_graph_cycle_graph( + create_using, radius, center, undirected, multiple_edges, n +): + Gnx = nx.cycle_graph(7, create_using=create_using) + if multiple_edges: + # Test multigraph with multiple edges + if not Gnx.is_multigraph(): + return + Gnx.add_edges_from(nx.cycle_graph(7, create_using=nx.DiGraph).edges) + Gnx.add_edge(0, 1, 10) + Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True) + assert_graphs_equal(Gnx, Gcg) # Sanity check + + kwargs = {"radius": radius, "center": center, "undirected": undirected} + Hnx = nx.ego_graph(Gnx, n, **kwargs) + Hcg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") + assert_graphs_equal(Hnx, Hcg) + with pytest.raises(nx.NodeNotFound, match="not in G"): + nx.ego_graph(Gnx, -1, **kwargs) + with pytest.raises(nx.NodeNotFound, match="not in G"): + nx.ego_graph(Gnx, -1, **kwargs, backend="cugraph") + # Using sssp with default weight of 1 should give same answer as bfs + nx.set_edge_attributes(Gnx, 1, name="weight") + Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True) + assert_graphs_equal(Gnx, Gcg) # Sanity check + + kwargs["distance"] = "weight" + H2nx = nx.ego_graph(Gnx, n, **kwargs) + is_nx32 = nxver.major == 3 and nxver.minor == 2 + if undirected and Gnx.is_directed() and Gnx.is_multigraph(): + if is_nx32: + # `should_run` was added in nx 3.3 + match = "Weighted ego_graph with undirected=True not implemented" + else: + match = "not implemented by cugraph" + with pytest.raises(RuntimeError, match=match): + nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") + with pytest.raises(NotImplementedError, match="ego_graph"): + nx.ego_graph(Gcg, n, **kwargs) + else: + H2cg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") + assert_graphs_equal(H2nx, H2cg) + with pytest.raises(nx.NodeNotFound, match="not found in graph"): + nx.ego_graph(Gnx, -1, **kwargs) + with pytest.raises(nx.NodeNotFound, match="not found in graph"): + nx.ego_graph(Gnx, -1, **kwargs, backend="cugraph") diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index a7daf01775b..477fe8bb493 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. [build-system] @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", @@ -233,6 +233,7 @@ ignore = [ "nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] "_nx_cugraph/__init__.py" = ["E501"] "nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms +"nx_cugraph/generators/**/*py" = ["D205", "D401"] # Allow flexible docstrings for generators "nx_cugraph/interface.py" = ["D401"] # Flexible docstrings "scripts/update_readme.py" = ["INP001"] # Not part of a package diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd index 6f1ac1f640b..315c9bd7503 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd @@ -183,6 +183,17 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_error_t** error ) + ########################################################################### + # count multi-edges + cdef cugraph_error_code_t \ + cugraph_count_multi_edges( + const cugraph_resource_handle_t *handle, + cugraph_graph_t* graph, + bool_t do_expenive_check, + size_t *result, + cugraph_error_t** error + ) + ########################################################################### # degrees ctypedef struct cugraph_degrees_result_t: diff --git a/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx b/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx new file mode 100644 index 00000000000..d3780e53283 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/count_multi_edges.pyx @@ -0,0 +1,96 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + data_type_id_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.graph_functions cimport ( + cugraph_count_multi_edges, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) + + +def count_multi_edges(ResourceHandle resource_handle, + _GPUGraph graph, + bool_t do_expensive_check): + """ + Count the number of multi-edges in the graph. This returns + the number of duplicates. If the edge (u, v) appears k times + in the graph, then that edge will contribute (k-1) toward the + total number of duplicates. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + do_expensive_check : bool_t + A flag to run expensive checks for input arguments if True. + + Returns + ------- + Total count of duplicate edges in the graph + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 0, 0], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 1, 1], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> count = pylibcugraph.count_multi_edges(resource_handle, G, False) + + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef size_t result + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + error_code = cugraph_count_multi_edges(c_resource_handle_ptr, + c_graph_ptr, + do_expensive_check, + &result, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_count_multi_edges") + + return result;