From 78227b3af73b23d5e25bbb35a4875de055452f4f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 8 May 2024 06:10:38 -1000 Subject: [PATCH 01/13] Address dask_cudf.read_csv chunksize deprecation (#4379) xref https://github.com/rapidsai/cugraph/pull/4271 `chunksize` was deprecated in favor of `blocksize` Also removed an unsupported `chunksize` in a `cudf.read_csv` call Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Don Acosta (https://github.com/acostadon) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4379 --- benchmarks/cugraph/standalone/cugraph_dask_funcs.py | 4 ++-- benchmarks/cugraph/standalone/cugraph_funcs.py | 4 +--- docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst | 2 +- .../cugraph/dask/centrality/betweenness_centrality.py | 6 +++--- .../cugraph/dask/centrality/eigenvector_centrality.py | 4 ++-- python/cugraph/cugraph/dask/centrality/katz_centrality.py | 4 ++-- python/cugraph/cugraph/dask/community/leiden.py | 2 +- python/cugraph/cugraph/dask/community/louvain.py | 2 +- python/cugraph/cugraph/dask/components/connectivity.py | 4 ++-- python/cugraph/cugraph/dask/cores/k_core.py | 4 ++-- python/cugraph/cugraph/dask/link_analysis/hits.py | 4 ++-- python/cugraph/cugraph/dask/link_analysis/pagerank.py | 2 +- python/cugraph/cugraph/dask/traversal/bfs.py | 4 ++-- python/cugraph/cugraph/dask/traversal/sssp.py | 4 ++-- python/cugraph/cugraph/structure/symmetrize.py | 2 +- python/cugraph/cugraph/testing/utils.py | 6 +++--- python/cugraph/cugraph/tests/comms/test_comms_mg.py | 6 +++--- python/cugraph/cugraph/tests/community/test_leiden_mg.py | 6 +++--- python/cugraph/cugraph/tests/community/test_louvain_mg.py | 6 +++--- .../cugraph/tests/community/test_triangle_count_mg.py | 4 ++-- .../cugraph/tests/components/test_connectivity_mg.py | 4 ++-- python/cugraph/cugraph/tests/core/test_core_number_mg.py | 6 +++--- python/cugraph/cugraph/tests/core/test_k_core_mg.py | 6 +++--- .../cugraph/tests/data_store/test_property_graph_mg.py | 4 ++-- .../cugraph/cugraph/tests/internals/test_renumber_mg.py | 4 ++-- .../cugraph/cugraph/tests/link_analysis/test_hits_mg.py | 6 +++--- .../cugraph/tests/link_analysis/test_pagerank_mg.py | 8 ++++---- .../cugraph/tests/link_prediction/test_jaccard_mg.py | 4 ++-- .../cugraph/tests/link_prediction/test_overlap_mg.py | 4 ++-- .../cugraph/tests/link_prediction/test_sorensen_mg.py | 4 ++-- python/cugraph/cugraph/tests/sampling/test_egonet_mg.py | 4 ++-- .../cugraph/tests/sampling/test_random_walks_mg.py | 4 ++-- .../tests/sampling/test_uniform_neighbor_sample_mg.py | 6 +++--- python/cugraph/cugraph/tests/structure/test_graph.py | 4 ++-- python/cugraph/cugraph/tests/structure/test_graph_mg.py | 6 +++--- python/cugraph/cugraph/tests/traversal/test_bfs_mg.py | 8 ++++---- python/cugraph/cugraph/tests/traversal/test_sssp_mg.py | 4 ++-- python/cugraph/cugraph/tests/utils/test_utils_mg.py | 4 ++-- 38 files changed, 84 insertions(+), 86 deletions(-) diff --git a/benchmarks/cugraph/standalone/cugraph_dask_funcs.py b/benchmarks/cugraph/standalone/cugraph_dask_funcs.py index e8f2c3a62bb..f31e5065cde 100644 --- a/benchmarks/cugraph/standalone/cugraph_dask_funcs.py +++ b/benchmarks/cugraph/standalone/cugraph_dask_funcs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -44,7 +44,7 @@ def read_csv(input_csv_file, scale): chunksize = cugraph.dask.get_chunksize(input_csv_file) return dask_cudf.read_csv( input_csv_file, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", # names=names, dtype=dtypes, diff --git a/benchmarks/cugraph/standalone/cugraph_funcs.py b/benchmarks/cugraph/standalone/cugraph_funcs.py index d53471fa828..10b23455670 100644 --- a/benchmarks/cugraph/standalone/cugraph_funcs.py +++ b/benchmarks/cugraph/standalone/cugraph_funcs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -75,10 +75,8 @@ def read_csv(input_csv_file, scale): dtypes = [vertex_t, vertex_t, "float32"] names = (["src", "dst", "weight"],) - chunksize = cugraph.dask.get_chunksize(input_csv_file) return cudf.read_csv( input_csv_file, - chunksize=chunksize, delimiter=" ", # names=names, dtype=dtypes, diff --git a/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst b/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst index d9ba3f3a792..f5132dd658c 100644 --- a/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst +++ b/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst @@ -54,7 +54,7 @@ Example # multi-GPU CSV reader e_list = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, names=['src', 'dst'], dtype=['int32', 'int32'], ) diff --git a/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py b/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py index 6aa708ea585..43891f487c1 100644 --- a/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -204,7 +204,7 @@ def betweenness_centrality( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) @@ -362,7 +362,7 @@ def edge_betweenness_centrality( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py b/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py index 0dcd2b38546..7dfe3df7030 100644 --- a/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -104,7 +104,7 @@ def eigenvector_centrality(input_graph, max_iter=100, tol=1.0e-6): >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph() diff --git a/python/cugraph/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/cugraph/dask/centrality/katz_centrality.py index 3891c04f5aa..a11be3b6870 100644 --- a/python/cugraph/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -133,7 +133,7 @@ def katz_centrality( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index 24a077d1845..bdcf9edc7bb 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -132,7 +132,7 @@ def leiden( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph() diff --git a/python/cugraph/cugraph/dask/community/louvain.py b/python/cugraph/cugraph/dask/community/louvain.py index 2d894d9665f..8ad3e6c2cf3 100644 --- a/python/cugraph/cugraph/dask/community/louvain.py +++ b/python/cugraph/cugraph/dask/community/louvain.py @@ -136,7 +136,7 @@ def louvain( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph() diff --git a/python/cugraph/cugraph/dask/components/connectivity.py b/python/cugraph/cugraph/dask/components/connectivity.py index 7adaa2cd509..5a92fe004ee 100644 --- a/python/cugraph/cugraph/dask/components/connectivity.py +++ b/python/cugraph/cugraph/dask/components/connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ def weakly_connected_components(input_graph): >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/dask/cores/k_core.py b/python/cugraph/cugraph/dask/cores/k_core.py index 4cc1ffc9f9b..0d799e3ee06 100644 --- a/python/cugraph/cugraph/dask/cores/k_core.py +++ b/python/cugraph/cugraph/dask/cores/k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -109,7 +109,7 @@ def k_core(input_graph, k=None, core_number=None, degree_type="bidirectional"): >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/dask/link_analysis/hits.py b/python/cugraph/cugraph/dask/link_analysis/hits.py index 3de69e1518b..13357ebb996 100644 --- a/python/cugraph/cugraph/dask/link_analysis/hits.py +++ b/python/cugraph/cugraph/dask/link_analysis/hits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -118,7 +118,7 @@ def hits(input_graph, tol=1.0e-5, max_iter=100, nstart=None, normalized=True): >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/cugraph/dask/link_analysis/pagerank.py index 62ae9109624..4b592a2583c 100644 --- a/python/cugraph/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/dask/link_analysis/pagerank.py @@ -295,7 +295,7 @@ def pagerank( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/dask/traversal/bfs.py b/python/cugraph/cugraph/dask/traversal/bfs.py index 412fd851ad6..f1b26472ee0 100644 --- a/python/cugraph/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/cugraph/dask/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -106,7 +106,7 @@ def bfs(input_graph, start, depth_limit=None, return_distances=True, check_start >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/dask/traversal/sssp.py b/python/cugraph/cugraph/dask/traversal/sssp.py index 053a93fb42a..04c4376a500 100644 --- a/python/cugraph/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/cugraph/dask/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -89,7 +89,7 @@ def sssp(input_graph, source, cutoff=None, check_source=True): >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", - ... chunksize=chunksize, delimiter=" ", + ... blocksize=chunksize, delimiter=" ", ... names=["src", "dst", "value"], ... dtype=["int32", "int32", "float32"]) >>> dg = cugraph.Graph(directed=True) diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 30c6394ade9..28e2932d17a 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -159,7 +159,7 @@ def symmetrize_ddf( >>> # Init a DASK Cluster >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> # chunksize = dcg.get_chunksize(datasets / 'karate.csv') - >>> # ddf = dask_cudf.read_csv(datasets/'karate.csv', chunksize=chunksize, + >>> # ddf = dask_cudf.read_csv(datasets/'karate.csv', blocksize=chunksize, >>> # delimiter=' ', >>> # names=['src', 'dst', 'weight'], >>> # dtype=['int32', 'int32', 'float32']) diff --git a/python/cugraph/cugraph/testing/utils.py b/python/cugraph/cugraph/testing/utils.py index 6d58076e6fe..82bbfe0ecac 100644 --- a/python/cugraph/cugraph/testing/utils.py +++ b/python/cugraph/cugraph/testing/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -245,7 +245,7 @@ def read_dask_cudf_csv_file(csv_file, read_weights_in_sp=True, single_partition= chunksize = os.path.getsize(csv_file) return dask_cudf.read_csv( csv_file, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "weight"], dtype=["int32", "int32", "float32"], @@ -264,7 +264,7 @@ def read_dask_cudf_csv_file(csv_file, read_weights_in_sp=True, single_partition= chunksize = os.path.getsize(csv_file) return dask_cudf.read_csv( csv_file, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "weight"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index 747ef935e01..75462924c9d 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -53,7 +53,7 @@ def test_dask_mg_pagerank(dask_client, directed): ddf1 = dask_cudf.read_csv( input_data_path1, - chunksize=chunksize1, + blocksize=chunksize1, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -66,7 +66,7 @@ def test_dask_mg_pagerank(dask_client, directed): ddf2 = dask_cudf.read_csv( input_data_path2, - chunksize=chunksize2, + blocksize=chunksize2, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index 69fccdae260..b1908ae10a2 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -67,7 +67,7 @@ def daskGraphFromDataset(request, dask_client): chunksize = dcg.get_chunksize(dataset) ddf = dask_cudf.read_csv( dataset, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -96,7 +96,7 @@ def uddaskGraphFromDataset(request, dask_client): chunksize = dcg.get_chunksize(dataset) ddf = dask_cudf.read_csv( dataset, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py index 5318262fe26..19fffe96b5c 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py +++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -67,7 +67,7 @@ def daskGraphFromDataset(request, dask_client): chunksize = dcg.get_chunksize(dataset) ddf = dask_cudf.read_csv( dataset, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -96,7 +96,7 @@ def uddaskGraphFromDataset(request, dask_client): chunksize = dcg.get_chunksize(dataset) ddf = dask_cudf.read_csv( dataset, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 0f7bb14581f..0a052845cf8 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -88,7 +88,7 @@ def input_expected_output(dask_client, input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index 217c9f0f09f..26e8ed17bcb 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -48,7 +48,7 @@ def test_dask_mg_wcc(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index 23214b5f51b..f771ce513eb 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -78,7 +78,7 @@ def input_expected_output(dask_client, input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -143,7 +143,7 @@ def test_core_number_invalid_input(input_expected_output): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index 32c4f4553a2..b2ac18cf3a9 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -98,7 +98,7 @@ def input_expected_output(dask_client, input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -164,7 +164,7 @@ def test_dask_mg_k_core_invalid_input(dask_client): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py b/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py index dd48fc72e36..db4ab0a2ac1 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -372,7 +372,7 @@ def net_MGPropertyGraph(dask_client): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py index e9521f16594..45a3c46309d 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -140,7 +140,7 @@ def test_dask_mg_pagerank(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py index 73ec13c674c..6e68059bcc7 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -81,7 +81,7 @@ def input_expected_output(input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -162,7 +162,7 @@ def test_dask_mg_hits_transposed_false(dask_client): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py index 63dbf31ca5e..c65863aefb4 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,7 +55,7 @@ def create_distributed_karate_graph(store_transposed=True): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -103,7 +103,7 @@ def test_dask_mg_pagerank( ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -184,7 +184,7 @@ def test_pagerank_invalid_personalization_dtype(dask_client): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py index ee739c9f236..98f64906564 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -102,7 +102,7 @@ def input_expected_output(input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py index 87407d7b59c..9afe7dd842f 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -102,7 +102,7 @@ def input_expected_output(input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py index 66832d08427..6c24fa5af13 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -103,7 +103,7 @@ def input_expected_output(input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py index e2f77700958..9bc4caf0e8e 100644 --- a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,7 +90,7 @@ def input_expected_output(input_combo): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py index 03658c7a06e..29c15a7d7c6 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -182,7 +182,7 @@ def input_graph(request): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 32413d3c88d..c65535f98a2 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -87,7 +87,7 @@ def input_combo(request): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", indices_type], @@ -224,7 +224,7 @@ def test_mg_uniform_neighbor_sample_tree(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -1256,7 +1256,7 @@ def bench_uniform_neighbor_sample_email_eu_core(gpubenchmark, dask_client, n_sam ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "int32"], diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index de306309ca4..c0524fcfe77 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -386,7 +386,7 @@ def test_consolidation(graph_file): ddf = dask_cudf.read_csv( graph_file, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["source", "target", "weight"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index 7837916ae53..f23d4ec026d 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -68,7 +68,7 @@ def input_combo(request): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -285,7 +285,7 @@ def test_graph_repartition(dask_client): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py index 5eafc231141..62deb581d6d 100644 --- a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -48,7 +48,7 @@ def test_dask_mg_bfs(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -114,7 +114,7 @@ def test_dask_mg_bfs_invalid_start(dask_client, directed): el = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], @@ -150,7 +150,7 @@ def test_dask_mg_bfs_multi_column_depthlimit(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src_a", "dst_a", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py index 55bd320c2f1..9877a127700 100644 --- a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -47,7 +47,7 @@ def test_dask_mg_sssp(dask_client, directed): ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], diff --git a/python/cugraph/cugraph/tests/utils/test_utils_mg.py b/python/cugraph/cugraph/tests/utils/test_utils_mg.py index 23ff17aa00b..2945b216384 100644 --- a/python/cugraph/cugraph/tests/utils/test_utils_mg.py +++ b/python/cugraph/cugraph/tests/utils/test_utils_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -50,7 +50,7 @@ def test_from_edgelist(dask_client, directed): chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, - chunksize=chunksize, + blocksize=chunksize, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], From af749c3974f658039b5dda5742d407835d1e30f4 Mon Sep 17 00:00:00 2001 From: Don Acosta <97529984+acostadon@users.noreply.github.com> Date: Thu, 9 May 2024 09:05:24 -0400 Subject: [PATCH 02/13] Removed obsolete methods from rst file (#4331) Removes doc generation for the link prediction algorithm methods that have been taken out closes #4330 Authors: - Don Acosta (https://github.com/acostadon) - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/4331 --- docs/cugraph/source/api_docs/cugraph/link_prediction.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/cugraph/source/api_docs/cugraph/link_prediction.rst b/docs/cugraph/source/api_docs/cugraph/link_prediction.rst index f05dce6f721..3d2f9562e32 100644 --- a/docs/cugraph/source/api_docs/cugraph/link_prediction.rst +++ b/docs/cugraph/source/api_docs/cugraph/link_prediction.rst @@ -11,7 +11,6 @@ Jaccard Coefficient cugraph.jaccard cugraph.jaccard_coefficient - cugraph.jaccard_w Overlap Coefficient @@ -21,7 +20,6 @@ Overlap Coefficient cugraph.overlap cugraph.overlap_coefficient - cugraph.overlap_w Sorensen Coefficient @@ -31,4 +29,3 @@ Sorensen Coefficient cugraph.sorensen cugraph.sorensen_coefficient - cugraph.sorensen_w From 06397973d04b14d18a2805c9c5e16150099ffd21 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 10 May 2024 17:58:49 -0500 Subject: [PATCH 03/13] Updates SG `PropertyGraph` and `cugraph-service` to apply `DataFrame.fillna()` based on latest cuDF changes (#4408) This handles a [recent cuDF change](https://github.com/rapidsai/cudf/pull/15683) by applying non-dict and non-Series values for a `fillna()` call on `PropertyGraph` instances only to the user-defined columns, with the assumption that savvy users that intend to update the "internal" columns, or users that are aware of their own categorical dtype columns, will use a dict or Series value to properly apply dtypes as needed. This also updates code in `cugraph-service` that serializes dataframes to numpy bytes to properly convert NA values when categoricals are present. Notes: * This is only applied to the SG `PropertyGraph` class. The MG class needs further review as to how to best apply the same policy (and because there are other MG failing tests that need addressed). Since this is blocking CI for the SG case only, this PR is being submitted now and MG will be addressed later, which should be okay since `PropertyGraph` is experimental. * This could be considered a breaking change if `PropertyGraph` was not experimental. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/4408 --- .../cugraph_service_server/cugraph_handler.py | 20 +++++++++++-- .../testing/benchmark_server_extension.py | 6 ++-- .../cugraph/structure/property_graph.py | 30 ++++++++++++++++++- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py b/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py index 6cdf0d793d4..f60f597cfae 100644 --- a/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py +++ b/python/cugraph-service/server/cugraph_service_server/cugraph_handler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1370,7 +1370,23 @@ def __get_graph_data_as_numpy_bytes(self, dataframe, null_replacement_value): # FIXME: should something other than a numpy type be serialized to # prevent a copy? (note: any other type required to be de-serialzed # on the client end could add dependencies on the client) - df_numpy = dataframe.to_numpy(na_value=n) + df_copy = dataframe.copy() + for col_name in df_copy.columns: + if df_copy[col_name].dtype == "category": + cat_dt = df_copy.dtypes[col_name].categories.dtype + if cat_dt == "object": + new_cat = str(n) + else: + new_cat = n + if new_cat not in df_copy.dtypes[col_name].categories: + df_copy[col_name] = df_copy[col_name].cat.add_categories( + new_cat + ) + df_copy[col_name].fillna(new_cat, inplace=True) + else: + df_copy[col_name].fillna(n, inplace=True) + + df_numpy = df_copy.to_numpy() return df_numpy.dumps() except Exception: diff --git a/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py b/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py index 361226c8071..dbd75e6abd9 100644 --- a/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py +++ b/python/cugraph-service/server/cugraph_service_server/testing/benchmark_server_extension.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,12 +25,12 @@ def create_graph_from_builtin_dataset(dataset_name, mg=False, server=None): dataset_obj = getattr(datasets, dataset_name) # FIXME: create an MG graph if server is mg? - return dataset_obj.get_graph(fetch=True) + return dataset_obj.get_graph(download=True) def create_property_graph_from_builtin_dataset(dataset_name, mg=False, server=None): dataset_obj = getattr(datasets, dataset_name) - edgelist_df = dataset_obj.get_edgelist(fetch=True) + edgelist_df = dataset_obj.get_edgelist(download=True) if mg and (server is not None) and server.is_multi_gpu: G = MGPropertyGraph() diff --git a/python/cugraph/cugraph/structure/property_graph.py b/python/cugraph/cugraph/structure/property_graph.py index 513798f35f9..53c1bf778c7 100644 --- a/python/cugraph/cugraph/structure/property_graph.py +++ b/python/cugraph/cugraph/structure/property_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -123,6 +123,17 @@ class EXPERIMENTAL__PropertyGraph: _default_type_name = "" + _internal_col_names = set( + ( + vertex_col_name, + src_col_name, + dst_col_name, + type_col_name, + edge_id_col_name, + weight_col_name, + ) + ) + def __init__(self): # The dataframe containing the properties for each vertex. # Each vertex occupies a row, and individual properties are maintained @@ -1380,6 +1391,15 @@ def fillna_vertices(self, val=0): Series is passed, the index or keys are the columns to fill and the values are the fill value for the corresponding column. """ + # Omit internal columns if an object is passed in to be applied to the + # entire DataFrame and assume the intent is for users to fillna only on + # their data. + if type(val) not in [dict, self.__series_type]: + user_col_names = ( + set(self.__vertex_prop_dataframe.columns) - self._internal_col_names + ) + val = dict((k, val) for k in user_col_names) + self.__vertex_prop_dataframe.fillna(val, inplace=True) def fillna_edges(self, val=0): @@ -1394,6 +1414,14 @@ def fillna_edges(self, val=0): Series is passed, the index or keys are the columns to fill and the values are the fill value for the corresponding column. """ + # Omit internal columns if an object is passed in to be applied to the + # entire DataFrame and assume the intent is for users to fillna only on + # their data. + if type(val) not in [dict, self.__series_type]: + user_col_names = ( + set(self.__edge_prop_dataframe.columns) - self._internal_col_names + ) + val = dict((k, val) for k in user_col_names) self.__edge_prop_dataframe.fillna(val, inplace=True) From 7db86b3cc9573d6fe0841d5ce9a7ec11a52a1f06 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:18:56 -1000 Subject: [PATCH 04/13] Address k_truss_subgraph(use_weights=) deprecation (#4389) This deprecated parameter appears to be a no-op but can raise the warning through `k_truss` with no addressable action from a user. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4389 --- python/cugraph/cugraph/community/ktruss_subgraph.py | 9 ++++----- .../cugraph/tests/community/test_k_truss_subgraph.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py index 15a10007610..1799c50252f 100644 --- a/python/cugraph/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/cugraph/community/ktruss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -95,11 +95,11 @@ def k_truss( G, isNx = ensure_cugraph_obj_for_nx(G) if isNx is True: - k_sub = ktruss_subgraph(G, k) + k_sub = ktruss_subgraph(G, k, use_weights=False) S = cugraph_to_nx(k_sub) return S else: - return ktruss_subgraph(G, k) + return ktruss_subgraph(G, k, use_weights=False) # FIXME: merge this function with k_truss @@ -174,8 +174,7 @@ def ktruss_subgraph( -------- >>> from cugraph.datasets import karate >>> G = karate.get_graph(download=True) - >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) - + >>> k_subgraph = cugraph.ktruss_subgraph(G, 3, use_weights=False) """ _ensure_compatible_cuda_version() diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index c1f8f4c3546..063d7fc735f 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -98,7 +98,7 @@ def test_ktruss_subgraph_Graph(_, nx_ground_truth): k = 5 G = polbooks.get_graph(download=True, create_using=cugraph.Graph(directed=False)) - k_subgraph = cugraph.ktruss_subgraph(G, k) + k_subgraph = cugraph.ktruss_subgraph(G, k, use_weights=False) compare_k_truss(k_subgraph, k, nx_ground_truth) From 82d2a56a0432b7ce5d049a137982294b266e374a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:19:37 -1000 Subject: [PATCH 05/13] Remove unused benchmark fixture in test_dask_mg_random_walks (#4391) Removes an easy `PytestBenchmarkWarning` from the tests Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4391 --- python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py index 29c15a7d7c6..2db3c6f5907 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py @@ -202,7 +202,7 @@ def input_graph(request): @pytest.mark.mg @pytest.mark.cugraph_ops -def test_dask_mg_random_walks(dask_client, benchmark, input_graph): +def test_dask_mg_random_walks(dask_client, input_graph): path_data, seeds, max_depth = calc_random_walks(input_graph) df_G = input_graph.input_df.compute().reset_index(drop=True) check_random_walks(input_graph, path_data, seeds, max_depth, df_G) From d18b66209986c30a83afe6428affbcaef5c8b66a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:20:47 -1000 Subject: [PATCH 06/13] Adjust deprecated cugraph.subgraph usage in Python tests (#4386) Broken off from https://github.com/rapidsai/cugraph/pull/4271 Appears this was deprecated in favor of `induced_subgraph` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4386 --- .../community/test_subgraph_extraction.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 8abab3179fe..a4a68a130a3 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,7 +51,7 @@ def cugraph_call(M, verts, directed=True): G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="weight") cu_verts = cudf.Series(verts) - return cugraph.subgraph(G, cu_verts) + return cugraph.induced_subgraph(G, cu_verts) def nx_call(M, verts, directed=True): @@ -74,7 +74,7 @@ def test_subgraph_extraction_DiGraph(graph_file): verts[0] = 0 verts[1] = 1 verts[2] = 17 - cu_sg = cugraph_call(M, verts, True) + cu_sg = cugraph_call(M, verts, True)[0] nx_sg = nx_call(M, verts, True) assert compare_edges(cu_sg, nx_sg) @@ -88,7 +88,7 @@ def test_subgraph_extraction_Graph(graph_file): verts[0] = 0 verts[1] = 1 verts[2] = 17 - cu_sg = cugraph_call(M, verts, False) + cu_sg = cugraph_call(M, verts, False)[0] nx_sg = nx_call(M, verts, False) assert compare_edges(cu_sg, nx_sg) @@ -116,7 +116,7 @@ def test_subgraph_extraction_Graph_nx(graph_file): nx_sub = nx.subgraph(G, verts) cu_verts = cudf.Series(verts) - cu_sub = cugraph.subgraph(G, cu_verts) + cu_sub = cugraph.induced_subgraph(G, cu_verts)[0] for (u, v) in cu_sub.edges(): assert nx_sub.has_edge(u, v) @@ -147,19 +147,19 @@ def test_subgraph_extraction_multi_column(graph_file): verts_G1["v_0"] = verts verts_G1["v_1"] = verts + 1000 - sG1 = cugraph.subgraph(G1, verts_G1) + sG1 = cugraph.induced_subgraph(G1, verts_G1) G2 = cugraph.Graph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0", edge_attr="weight") - sG2 = cugraph.subgraph(G2, verts) + sG2 = cugraph.induced_subgraph(G2, verts) # FIXME: Replace with multi-column view_edge_list() - edgelist_df = sG1.edgelist.edgelist_df - edgelist_df_res = sG1.unrenumber(edgelist_df, "src") - edgelist_df_res = sG1.unrenumber(edgelist_df_res, "dst") + edgelist_df = sG1[0].edgelist.edgelist_df + edgelist_df_res = sG1[0].unrenumber(edgelist_df, "src") + edgelist_df_res = sG1[0].unrenumber(edgelist_df_res, "dst") for i in range(len(edgelist_df_res)): - assert sG2.has_edge( + assert sG2[0].has_edge( edgelist_df_res["0_src"].iloc[i], edgelist_df_res["0_dst"].iloc[i] ) @@ -180,7 +180,7 @@ def test_subgraph_extraction_graph_not_renumbered(): G.from_cudf_edgelist( gdf, source="src", destination="dst", edge_attr="wgt", renumber=False ) - Sg = cugraph.subgraph(G, sverts) + Sg = cugraph.induced_subgraph(G, sverts) assert Sg.number_of_vertices() == 3 assert Sg.number_of_edges() == 3 From 74128e88313b41304efbcf43f815a9caea1956cb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:22:03 -1000 Subject: [PATCH 07/13] Address do_expensive_check FutureWarnings in Python tests (#4390) Looks like this parameter has no effect in `sorensen` and `overlap` so it should test equivalent behavior to set to `False` and avoid the `FutureWarning` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4390 --- python/cugraph/cugraph/tests/link_prediction/test_overlap.py | 4 +++- python/cugraph/cugraph/tests/link_prediction/test_sorensen.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py index 11ef0047b63..4b00330b6c9 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py @@ -182,7 +182,9 @@ def test_directed_graph_check(graph_file, use_weight): vertex_pair = vertex_pair[:5] with pytest.raises(ValueError): - cugraph.overlap(G1, vertex_pair, use_weight) + cugraph.overlap( + G1, vertex_pair, do_expensive_check=False, use_weight=use_weight + ) @pytest.mark.sg diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py index 8806f135302..6345187a376 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py @@ -219,7 +219,9 @@ def test_directed_graph_check(read_csv, use_weight): vertex_pair = vertex_pair[:5] with pytest.raises(ValueError): - cugraph.sorensen(G1, vertex_pair, use_weight) + cugraph.sorensen( + G1, vertex_pair, do_expensive_check=False, use_weight=use_weight + ) @pytest.mark.sg From 08e7c9185070b14a6c135e029a91a42ea070fea8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:23:06 -1000 Subject: [PATCH 08/13] Address PendingDeprecationWarning from _from_edgelist (#4388) `__from_edgelist`'s default is causing a `PendingDeprecationWarning` from having a default of `legacy_renum_only=True`. It appears that this parameter has no effect so it should be OK to change the default so that this warning doesn't appear Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4388 --- .../cugraph/structure/graph_implementation/simpleGraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 99934e02b10..c90607f9bf6 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -131,7 +131,7 @@ def __from_edgelist( edge_id=None, edge_type=None, renumber=True, - legacy_renum_only=True, + legacy_renum_only=False, store_transposed=False, ): if legacy_renum_only: From 7d1c3b2886e223596a2b078b8593b3cfe01e0f63 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:24:05 -1000 Subject: [PATCH 09/13] Replace deprecated cupy.sparse with cupyx.scipy.sparse (#4381) Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4381 --- python/cugraph/cugraph/tests/generators/test_rmat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/tests/generators/test_rmat.py b/python/cugraph/cugraph/tests/generators/test_rmat.py index 9b8353a4ca5..1cee0461686 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ import cudf import cugraph from cugraph.generators import rmat -from cupy.sparse import coo_matrix, triu, tril +from cupyx.scipy.sparse import coo_matrix, triu, tril import numpy as np import cupy as cp From dbd558f6fe73fa09318bfde349f953f8e518d3b0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 11 May 2024 05:33:09 -1000 Subject: [PATCH 10/13] Skip/adjust doctests for deprecations (#4380) xref https://github.com/rapidsai/cugraph/pull/4271 For methods/parameters that are deprecated, ensures the doctests will pass if `FutureWarning` or `DeprecationWarning` are made as errors Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4380 --- python/cugraph/cugraph/community/egonet.py | 4 +--- python/cugraph/cugraph/community/subgraph_extraction.py | 5 ++--- python/cugraph/cugraph/structure/symmetrize.py | 6 ++---- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/python/cugraph/cugraph/community/egonet.py b/python/cugraph/cugraph/community/egonet.py index b7341ca3bae..56ae8ce70cc 100644 --- a/python/cugraph/cugraph/community/egonet.py +++ b/python/cugraph/cugraph/community/egonet.py @@ -199,9 +199,7 @@ def batched_ego_graphs(G, seeds, radius=1, center=True, undirected=None, distanc -------- >>> from cugraph.datasets import karate >>> G = karate.get_graph(download=True) - >>> b_ego_graph, offsets = cugraph.batched_ego_graphs(G, seeds=[1,5], - ... radius=2) - + >>> cugraph.batched_ego_graphs(G, seeds=[1,5], radius=2) # doctest: +SKIP """ warning_msg = "This function is deprecated. Batched support for multiple vertices \ will be added to `ego_graph`" diff --git a/python/cugraph/cugraph/community/subgraph_extraction.py b/python/cugraph/cugraph/community/subgraph_extraction.py index 77b28d4daff..43169051be4 100644 --- a/python/cugraph/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/cugraph/community/subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -64,8 +64,7 @@ def subgraph( >>> verts[1] = 1 >>> verts[2] = 2 >>> sverts = cudf.Series(verts) - >>> Sg = cugraph.subgraph(G, sverts) - + >>> Sg = cugraph.subgraph(G, sverts) # doctest: +SKIP """ warning_msg = ( diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 28e2932d17a..3e46d81b6ff 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -73,8 +73,7 @@ def symmetrize_df( >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) - >>> sym_df = symmetrize_df(M, '0', '1') - + >>> sym_df = symmetrize_df(M, '0', '1', multi=True) """ if not isinstance(src_name, list): src_name = [src_name] @@ -256,8 +255,7 @@ def symmetrize( >>> df['sources'] = cudf.Series(M['0']) >>> df['destinations'] = cudf.Series(M['1']) >>> df['values'] = cudf.Series(M['2']) - >>> src, dst, val = symmetrize(df, 'sources', 'destinations', 'values') - + >>> src, dst, val = symmetrize(df, 'sources', 'destinations', 'values', multi=True) """ # FIXME: Redundant check that should be done at the graph creation From b2b44b0f4dcd1a7831738b12081dd95404bc13e8 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Mon, 13 May 2024 13:26:33 +0200 Subject: [PATCH 11/13] Update collect_comm to handle value of tuple type (#4410) Update collect_comm to handle value of tuple type Authors: - Naim (https://github.com/naimnv) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/4410 --- cpp/src/utilities/collect_comm.cuh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cpp/src/utilities/collect_comm.cuh b/cpp/src/utilities/collect_comm.cuh index 4bc2150588e..2197409fe26 100644 --- a/cpp/src/utilities/collect_comm.cuh +++ b/cpp/src/utilities/collect_comm.cuh @@ -100,7 +100,10 @@ collect_values_for_keys(raft::handle_t const& handle, auto rx_values_for_unique_keys = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, handle.get_stream()); + shuffle_values(comm, + get_dataframe_buffer_begin(values_for_rx_unique_keys), + rx_value_counts, + handle.get_stream()); values_for_unique_keys = std::move(rx_values_for_unique_keys); } @@ -136,9 +139,9 @@ collect_values_for_keys(raft::handle_t const& handle, handle.get_stream()); unique_keys.resize(0, handle.get_stream()); - values_for_unique_keys.resize(0, handle.get_stream()); + resize_dataframe_buffer(values_for_unique_keys, 0, handle.get_stream()); unique_keys.shrink_to_fit(handle.get_stream()); - values_for_unique_keys.shrink_to_fit(handle.get_stream()); + shrink_to_fit_dataframe_buffer(values_for_unique_keys, handle.get_stream()); } auto unique_key_value_store_view = unique_key_value_store.view(); @@ -248,15 +251,15 @@ collect_values_for_unique_int_vertices(raft::handle_t const& handle, thrust::transform(handle.get_thrust_policy(), rx_int_vertices.begin(), rx_int_vertices.end(), - value_buffer.begin(), + get_dataframe_buffer_begin(value_buffer), [local_value_first, local_int_vertex_first] __device__(auto v) { return local_value_first[v - local_int_vertex_first]; }); // 3: Shuffle results back to original GPU - std::tie(value_buffer, std::ignore) = - shuffle_values(comm, value_buffer.begin(), rx_int_vertex_counts, handle.get_stream()); + std::tie(value_buffer, std::ignore) = shuffle_values( + comm, get_dataframe_buffer_begin(value_buffer), rx_int_vertex_counts, handle.get_stream()); return std::make_tuple(std::move(collect_unique_int_vertices), std::move(value_buffer)); } @@ -305,7 +308,7 @@ collect_values_for_int_vertices( thrust::transform(handle.get_thrust_policy(), collect_vertex_first, collect_vertex_last, - value_buffer.begin(), + get_dataframe_buffer_begin(value_buffer), [device_view] __device__(auto v) { return device_view.find(v); }); return value_buffer; From 79acec9501431582b7d0d4915307b40a705b1fd0 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Mon, 13 May 2024 13:27:02 +0200 Subject: [PATCH 12/13] Update decompress_to_edgelist to handle edge types (#4397) Update decompress_to_edgelist to handle edge types Authors: - Naim (https://github.com/naimnv) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4397 --- .../detail/decompress_edge_partition.cuh | 25 ++- cpp/include/cugraph/graph_functions.hpp | 9 +- cpp/src/c_api/betweenness_centrality.cpp | 5 +- cpp/src/community/k_truss_impl.cuh | 28 +-- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 3 + cpp/src/structure/coarsen_graph_impl.cuh | 4 +- .../structure/decompress_to_edgelist_impl.cuh | 165 ++++++++++++++---- .../structure/decompress_to_edgelist_mg.cu | 64 ++++--- .../structure/decompress_to_edgelist_sg.cu | 64 ++++--- cpp/src/structure/symmetrize_graph_impl.cuh | 42 +++-- cpp/src/structure/transpose_graph_impl.cuh | 42 +++-- .../transpose_graph_storage_impl.cuh | 42 +++-- .../eigenvector_centrality_test.cpp | 3 +- cpp/tests/community/egonet_validate.cu | 14 +- cpp/tests/cores/k_core_validate.cu | 18 +- cpp/tests/prims/mg_transform_e.cu | 3 +- cpp/tests/sampling/random_walks_check.cuh | 3 +- cpp/tests/structure/mg_symmetrize_test.cpp | 32 ++-- .../structure/mg_transpose_storage_test.cpp | 36 ++-- cpp/tests/structure/mg_transpose_test.cpp | 32 ++-- cpp/tests/structure/symmetrize_test.cpp | 6 +- .../structure/transpose_storage_test.cpp | 5 +- cpp/tests/structure/transpose_test.cpp | 7 +- .../utilities/conversion_utilities_impl.cuh | 12 +- cpp/tests/utilities/debug_utilities_impl.hpp | 6 +- 25 files changed, 452 insertions(+), 218 deletions(-) diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index 6b974a326dd..c94f456f215 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -209,7 +209,11 @@ void decompress_edge_partition_to_fill_edgelist_majors( } } -template +template void decompress_edge_partition_to_edgelist( raft::handle_t const& handle, edge_partition_device_view_t edge_partition, @@ -217,12 +221,15 @@ void decompress_edge_partition_to_edgelist( edge_partition_weight_view, std::optional> edge_partition_id_view, + std::optional> + edge_partition_type_view, std::optional> edge_partition_mask_view, raft::device_span edgelist_majors /* [OUT] */, raft::device_span edgelist_minors /* [OUT] */, std::optional> edgelist_weights /* [OUT] */, std::optional> edgelist_ids /* [OUT] */, + std::optional> edgelist_types /* [OUT] */, std::optional> const& segment_offsets) { auto number_of_edges = edge_partition.number_of_edges(); @@ -271,6 +278,22 @@ void decompress_edge_partition_to_edgelist( (*edgelist_ids).begin()); } } + + if (edge_partition_type_view) { + assert(edgelist_types.has_value()); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + (*edge_partition_type_view).value_first(), + (*edge_partition_type_view).value_first() + number_of_edges, + (*edge_partition_mask_view).value_first(), + (*edgelist_types).begin()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_type_view).value_first(), + (*edge_partition_type_view).value_first() + number_of_edges, + (*edgelist_types).begin()); + } + } } } // namespace detail diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6d4470e8251..79ff576571e 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -344,6 +344,7 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam edge_type_t Type of edge types. Needs to be an integral type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if * true) as major indices in storing edges using a 2D sparse matrix. transposed. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) @@ -351,8 +352,9 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object of the graph to be decompressed. - * @param edge_id_view Optional view object holding edge ids for @p graph_view. * @param edge_weight_view Optional view object holding edge weights for @p graph_view. + * @param edge_id_view Optional view object holding edge ids for @p graph_view. + * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param renumber_map If valid, return the renumbered edge list based on the provided @p * renumber_map * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). @@ -363,17 +365,20 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check = false); diff --git a/cpp/src/c_api/betweenness_centrality.cpp b/cpp/src/c_api/betweenness_centrality.cpp index 577826fe097..a3f7252891c 100644 --- a/cpp/src/c_api/betweenness_centrality.cpp +++ b/cpp/src/c_api/betweenness_centrality.cpp @@ -234,12 +234,13 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun normalized_, do_expensive_check_); - auto [src_ids, dst_ids, output_centralities, output_edge_ids] = - cugraph::decompress_to_edgelist( + auto [src_ids, dst_ids, output_centralities, output_edge_ids, output_edge_types] = + cugraph::decompress_to_edgelist( handle_, graph_view, std::make_optional(centralities.view()), (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + std::nullopt, (number_map != nullptr) ? std::make_optional(raft::device_span{ number_map->data(), number_map->size()}) : std::nullopt); diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 3db9fd70de2..7f96312703d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -671,12 +671,14 @@ k_truss(raft::handle_t const& handle, edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore) = decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); auto num_triangles = edge_triangle_count( handle, @@ -894,12 +896,14 @@ k_truss(raft::handle_t const& handle, num_triangles.resize(num_edges_with_triangles, handle.get_stream()); } - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore) = decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index 244586e6d9e..c938b10fbbb 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -309,16 +309,19 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( detail::decompress_edge_partition_to_edgelist( handle, edge_partition, std::nullopt, std::nullopt, + std::nullopt, edge_partition_e_mask, raft::device_span(majors.data(), majors.size()), raft::device_span(minors.data(), minors.size()), std::nullopt, std::nullopt, + std::nullopt, segment_offsets); auto vertex_pair_first = diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index f83cd752285..0689dc4a53a 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -171,11 +171,12 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist( ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition, edge_partition_weight_view, std::nullopt, + std::nullopt, edge_partition_e_mask, raft::device_span(edgelist_majors.data(), edgelist_majors.size()), raft::device_span(edgelist_minors.data(), edgelist_minors.size()), @@ -183,6 +184,7 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist( (*edgelist_weights).size()) : std::nullopt, std::nullopt, + std::nullopt, segment_offsets); auto pair_first = diff --git a/cpp/src/structure/decompress_to_edgelist_impl.cuh b/cpp/src/structure/decompress_to_edgelist_impl.cuh index 55230c0a05a..d94040493e2 100644 --- a/cpp/src/structure/decompress_to_edgelist_impl.cuh +++ b/cpp/src/structure/decompress_to_edgelist_impl.cuh @@ -49,18 +49,21 @@ namespace { template std::enable_if_t, rmm::device_uvector, std::optional>, - std::optional>>> + std::optional>, + std::optional>>> decompress_to_edgelist_impl( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check) { @@ -99,10 +102,13 @@ decompress_to_edgelist_impl( auto edgelist_weights = edge_weight_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; + auto edgelist_types = edge_type_view ? std::make_optional>( + edgelist_majors.size(), handle.get_stream()) + : std::nullopt; size_t cur_size{0}; for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view(i)), @@ -115,6 +121,11 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), i) : std::nullopt, + edge_type_view + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + (*edge_type_view), i) + : std::nullopt, graph_view.has_edge_mask() ? std::make_optional< detail::edge_partition_edge_property_device_view_t>( @@ -128,6 +139,9 @@ decompress_to_edgelist_impl( edgelist_ids ? std::make_optional>( (*edgelist_ids).data() + cur_size, edgelist_edge_counts[i]) : std::nullopt, + edgelist_types ? std::make_optional>( + (*edgelist_types).data() + cur_size, edgelist_edge_counts[i]) + : std::nullopt, graph_view.local_edge_partition_segment_offsets(i)); cur_size += edgelist_edge_counts[i]; } @@ -153,36 +167,95 @@ decompress_to_edgelist_impl( for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { major_ptrs[i] = edgelist_majors.data() + cur_size; minor_ptrs[i] = edgelist_minors.data() + cur_size; + if (edgelist_weights) { if (edgelist_ids) { - thrust::sort_by_key( - handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], - (*edgelist_ids).data() + cur_size, - (*edgelist_weights).data() + cur_size))); + if (edgelist_types) { + auto zip_itr = + thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], + (*edgelist_weights).data() + cur_size, + (*edgelist_ids).data() + cur_size, + (*edgelist_types).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + + } else { + auto zip_itr = + thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], + (*edgelist_weights).data() + cur_size, + (*edgelist_ids).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - thrust::make_zip_iterator(thrust::make_tuple( - major_ptrs[i], (*edgelist_weights).data() + cur_size))); + if (edgelist_types) { + auto zip_itr = + thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], + (*edgelist_weights).data() + cur_size, + (*edgelist_types).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + + } else { + auto zip_itr = thrust::make_zip_iterator( + thrust::make_tuple(major_ptrs[i], (*edgelist_weights).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + } } } else { if (edgelist_ids) { - thrust::sort_by_key(handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - thrust::make_zip_iterator(thrust::make_tuple( - major_ptrs[i], (*edgelist_ids).data() + cur_size))); + if (edgelist_types) { + auto zip_itr = + thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], + (*edgelist_ids).data() + cur_size, + (*edgelist_types).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + + } else { + auto zip_itr = thrust::make_zip_iterator( + thrust::make_tuple(major_ptrs[i], (*edgelist_ids).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - major_ptrs[i]); + if (edgelist_types) { + auto zip_itr = thrust::make_zip_iterator( + thrust::make_tuple(major_ptrs[i], (*edgelist_types).data() + cur_size)); + + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + zip_itr); + + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + major_ptrs[i]); + } } } + rmm::device_uvector d_segment_offsets(d_thresholds.size(), handle.get_stream()); thrust::lower_bound(handle.get_thrust_policy(), minor_ptrs[i], @@ -214,24 +287,28 @@ decompress_to_edgelist_impl( return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors), store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors), std::move(edgelist_weights), - std::move(edgelist_ids)); + std::move(edgelist_ids), + std::move(edgelist_types)); } template std::enable_if_t, rmm::device_uvector, std::optional>, - std::optional>>> + std::optional>, + std::optional>>> decompress_to_edgelist_impl( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check) { @@ -258,7 +335,12 @@ decompress_to_edgelist_impl( auto edgelist_ids = edge_id_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + + auto edgelist_types = edge_type_view ? std::make_optional>( + edgelist_majors.size(), handle.get_stream()) + : std::nullopt; + + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view()), @@ -271,6 +353,11 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), 0) : std::nullopt, + edge_type_view + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + (*edge_type_view), 0) + : std::nullopt, graph_view.has_edge_mask() ? std::make_optional< detail::edge_partition_edge_property_device_view_t>( @@ -284,6 +371,10 @@ decompress_to_edgelist_impl( edgelist_ids ? std::make_optional>((*edgelist_ids).data(), (*edgelist_ids).size()) : std::nullopt, + edgelist_types ? std::make_optional>((*edgelist_types).data(), + (*edgelist_types).size()) + : std::nullopt, + graph_view.local_edge_partition_segment_offsets()); if (renumber_map) { @@ -299,7 +390,8 @@ decompress_to_edgelist_impl( return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors), store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors), std::move(edgelist_weights), - std::move(edgelist_ids)); + std::move(edgelist_ids), + std::move(edgelist_types)); } } // namespace @@ -307,22 +399,35 @@ decompress_to_edgelist_impl( template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check) { - return decompress_to_edgelist_impl( - handle, graph_view, edge_weight_view, edge_id_view, renumber_map, do_expensive_check); + return decompress_to_edgelist_impl(handle, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + renumber_map, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/structure/decompress_to_edgelist_mg.cu b/cpp/src/structure/decompress_to_edgelist_mg.cu index 8321477d0a0..f8177d0417d 100644 --- a/cpp/src/structure/decompress_to_edgelist_mg.cu +++ b/cpp/src/structure/decompress_to_edgelist_mg.cu @@ -22,144 +22,168 @@ namespace cugraph { template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); diff --git a/cpp/src/structure/decompress_to_edgelist_sg.cu b/cpp/src/structure/decompress_to_edgelist_sg.cu index 09f42750f50..f2c1e07fde6 100644 --- a/cpp/src/structure/decompress_to_edgelist_sg.cu +++ b/cpp/src/structure/decompress_to_edgelist_sg.cu @@ -22,144 +22,168 @@ namespace cugraph { template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, + std::optional>, std::optional>> -decompress_to_edgelist( +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, std::optional>, - std::optional>> -decompress_to_edgelist( + std::optional>, + std::optional>> +decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, std::optional> edge_id_view, + std::optional> edge_type_view, std::optional> renumber_map, bool do_expensive_check); diff --git a/cpp/src/structure/symmetrize_graph_impl.cuh b/cpp/src/structure/symmetrize_graph_impl.cuh index baddc5f1da1..ae8f2b0a608 100644 --- a/cpp/src/structure/symmetrize_graph_impl.cuh +++ b/cpp/src/structure/symmetrize_graph_impl.cuh @@ -78,15 +78,17 @@ symmetrize_graph_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*renumber_map).data(), - (*renumber_map).size())); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*renumber_map).data(), + (*renumber_map).size())); graph = graph_t(handle); std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights) = @@ -168,16 +170,18 @@ symmetrize_graph_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - renumber_map ? std::make_optional>((*renumber_map).data(), - (*renumber_map).size()) - : std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + renumber_map ? std::make_optional>((*renumber_map).data(), + (*renumber_map).size()) + : std::nullopt); graph = graph_t(handle); std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights) = diff --git a/cpp/src/structure/transpose_graph_impl.cuh b/cpp/src/structure/transpose_graph_impl.cuh index 710d222ad61..a1eef536c43 100644 --- a/cpp/src/structure/transpose_graph_impl.cuh +++ b/cpp/src/structure/transpose_graph_impl.cuh @@ -79,15 +79,17 @@ transpose_graph_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*renumber_map).data(), - (*renumber_map).size())); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*renumber_map).data(), + (*renumber_map).size())); graph = graph_t(handle); std::tie(store_transposed ? edgelist_srcs : edgelist_dsts, @@ -175,16 +177,18 @@ transpose_graph_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - renumber_map ? std::make_optional>((*renumber_map).data(), - (*renumber_map).size()) - : std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + renumber_map ? std::make_optional>((*renumber_map).data(), + (*renumber_map).size()) + : std::nullopt); graph = graph_t(handle); auto vertices = renumber ? std::move(renumber_map) : std::make_optional>(number_of_vertices, diff --git a/cpp/src/structure/transpose_graph_storage_impl.cuh b/cpp/src/structure/transpose_graph_storage_impl.cuh index 53285b47d3a..f8e479f28ff 100644 --- a/cpp/src/structure/transpose_graph_storage_impl.cuh +++ b/cpp/src/structure/transpose_graph_storage_impl.cuh @@ -79,15 +79,17 @@ transpose_graph_storage_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*renumber_map).data(), - (*renumber_map).size())); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*renumber_map).data(), + (*renumber_map).size())); graph = graph_t(handle); std::tie(!store_transposed ? edgelist_dsts : edgelist_srcs, @@ -180,16 +182,18 @@ transpose_graph_storage_impl( rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_weights{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - edge_weights - ? std::optional>{(*edge_weights).view()} - : std::nullopt, - std::optional>{std::nullopt}, - renumber_map ? std::make_optional>((*renumber_map).data(), - (*renumber_map).size()) - : std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + graph_view, + edge_weights + ? std::optional>{(*edge_weights).view()} + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + renumber_map ? std::make_optional>((*renumber_map).data(), + (*renumber_map).size()) + : std::nullopt); graph = graph_t(handle); auto vertices = renumber ? std::move(renumber_map) : std::make_optional>(number_of_vertices, diff --git a/cpp/tests/centrality/eigenvector_centrality_test.cpp b/cpp/tests/centrality/eigenvector_centrality_test.cpp index c80c762d382..ab87cccd0e3 100644 --- a/cpp/tests/centrality/eigenvector_centrality_test.cpp +++ b/cpp/tests/centrality/eigenvector_centrality_test.cpp @@ -181,11 +181,12 @@ class Tests_EigenvectorCentrality rmm::device_uvector src_v(0, handle.get_stream()); std::optional> opt_wgt_v{std::nullopt}; - std::tie(dst_v, src_v, opt_wgt_v, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(dst_v, src_v, opt_wgt_v, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); auto h_src = cugraph::test::to_host(handle, src_v); diff --git a/cpp/tests/community/egonet_validate.cu b/cpp/tests/community/egonet_validate.cu index 9830deb8e1b..541baee2be9 100644 --- a/cpp/tests/community/egonet_validate.cu +++ b/cpp/tests/community/egonet_validate.cu @@ -48,12 +48,14 @@ egonet_reference( rmm::device_uvector d_coo_dst(0, handle.get_stream()); std::optional> d_coo_wgt{std::nullopt}; - std::tie(d_coo_src, d_coo_dst, d_coo_wgt, std::ignore) = cugraph::decompress_to_edgelist( - handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}); + std::tie(d_coo_src, d_coo_dst, d_coo_wgt, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); #else // FIXME: This should be faster (smaller list of edges to operate on), but uniform_nbr_sample // doesn't preserve multi-edges (which is probably a bug) diff --git a/cpp/tests/cores/k_core_validate.cu b/cpp/tests/cores/k_core_validate.cu index b264ed53540..53c97dd466b 100644 --- a/cpp/tests/cores/k_core_validate.cu +++ b/cpp/tests/cores/k_core_validate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,13 +65,15 @@ void check_correctness( rmm::device_uvector graph_dst(0, handle.get_stream()); std::optional> graph_wgt{std::nullopt}; - std::tie(graph_src, graph_dst, graph_wgt, std::ignore) = cugraph::decompress_to_edgelist( - handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); + std::tie(graph_src, graph_dst, graph_wgt, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); // Now we'll count how many edges should be in the subgraph auto expected_edge_count = diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu index 92046bdd8f8..2b8d5d52905 100644 --- a/cpp/tests/prims/mg_transform_e.cu +++ b/cpp/tests/prims/mg_transform_e.cu @@ -127,11 +127,12 @@ class Tests_MGTransformE if (prims_usecase.use_edgelist) { rmm::device_uvector srcs(0, handle_->get_stream()); rmm::device_uvector dsts(0, handle_->get_stream()); - std::tie(srcs, dsts, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( *handle_, mg_graph_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(store_transposed ? dsts.begin() : srcs.begin(), diff --git a/cpp/tests/sampling/random_walks_check.cuh b/cpp/tests/sampling/random_walks_check.cuh index 399bf991785..0fd73b5bba7 100644 --- a/cpp/tests/sampling/random_walks_check.cuh +++ b/cpp/tests/sampling/random_walks_check.cuh @@ -40,11 +40,12 @@ void random_walks_validate( rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); if constexpr (multi_gpu) { diff --git a/cpp/tests/structure/mg_symmetrize_test.cpp b/cpp/tests/structure/mg_symmetrize_test.cpp index 7eb387f3915..e607370f62a 100644 --- a/cpp/tests/structure/mg_symmetrize_test.cpp +++ b/cpp/tests/structure/mg_symmetrize_test.cpp @@ -128,14 +128,16 @@ class Tests_MGSymmetrize rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); std::optional> d_mg_weights{std::nullopt}; - std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - mg_graph.view(), - mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - mg_renumber_map ? std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()) - : std::nullopt); + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + mg_graph.view(), + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + mg_renumber_map ? std::make_optional>( + (*mg_renumber_map).data(), (*mg_renumber_map).size()) + : std::nullopt); // 4-2. aggregate MG results @@ -166,12 +168,14 @@ class Tests_MGSymmetrize rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); std::optional> d_sg_weights{std::nullopt}; - std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}); + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); // 4-5. compare diff --git a/cpp/tests/structure/mg_transpose_storage_test.cpp b/cpp/tests/structure/mg_transpose_storage_test.cpp index 4cbbe500dd8..c8b4f70f1e2 100644 --- a/cpp/tests/structure/mg_transpose_storage_test.cpp +++ b/cpp/tests/structure/mg_transpose_storage_test.cpp @@ -136,16 +136,18 @@ class Tests_MGTransposeStorage rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); std::optional> d_mg_weights{std::nullopt}; - std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - mg_storage_transposed_graph.view(), - mg_storage_transposed_edge_weights - ? std::make_optional((*mg_storage_transposed_edge_weights).view()) - : std::nullopt, - std::optional>{std::nullopt}, - mg_renumber_map ? std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()) - : std::nullopt); + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + mg_storage_transposed_graph.view(), + mg_storage_transposed_edge_weights + ? std::make_optional((*mg_storage_transposed_edge_weights).view()) + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + mg_renumber_map ? std::make_optional>( + (*mg_renumber_map).data(), (*mg_renumber_map).size()) + : std::nullopt); // 3-2. aggregate MG results @@ -165,12 +167,14 @@ class Tests_MGTransposeStorage rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); std::optional> d_sg_weights{std::nullopt}; - std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}); + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); // 3-4. compare diff --git a/cpp/tests/structure/mg_transpose_test.cpp b/cpp/tests/structure/mg_transpose_test.cpp index 80cdcae070a..4428f8430d5 100644 --- a/cpp/tests/structure/mg_transpose_test.cpp +++ b/cpp/tests/structure/mg_transpose_test.cpp @@ -126,14 +126,16 @@ class Tests_MGTranspose rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); std::optional> d_mg_weights{std::nullopt}; - std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - mg_graph.view(), - mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - mg_renumber_map ? std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()) - : std::nullopt); + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + mg_graph.view(), + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + mg_renumber_map ? std::make_optional>( + (*mg_renumber_map).data(), (*mg_renumber_map).size()) + : std::nullopt); // 4-2. aggregate MG results @@ -161,12 +163,14 @@ class Tests_MGTranspose rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); std::optional> d_sg_weights{std::nullopt}; - std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}); + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); // 4-5. compare diff --git a/cpp/tests/structure/symmetrize_test.cpp b/cpp/tests/structure/symmetrize_test.cpp index 383411ccf2b..a0d7118b098 100644 --- a/cpp/tests/structure/symmetrize_test.cpp +++ b/cpp/tests/structure/symmetrize_test.cpp @@ -214,12 +214,13 @@ class Tests_Symmetrize std::optional> d_org_weights{std::nullopt}; if (symmetrize_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) @@ -249,12 +250,13 @@ class Tests_Symmetrize rmm::device_uvector d_symm_dsts(0, handle.get_stream()); std::optional> d_symm_weights{std::nullopt}; - std::tie(d_symm_srcs, d_symm_dsts, d_symm_weights, std::ignore) = + std::tie(d_symm_srcs, d_symm_dsts, d_symm_weights, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) diff --git a/cpp/tests/structure/transpose_storage_test.cpp b/cpp/tests/structure/transpose_storage_test.cpp index c4f2523727c..144cebd98d6 100644 --- a/cpp/tests/structure/transpose_storage_test.cpp +++ b/cpp/tests/structure/transpose_storage_test.cpp @@ -78,12 +78,13 @@ class Tests_TransposeStorage rmm::device_uvector d_org_dsts(0, handle.get_stream()); std::optional> d_org_weights{std::nullopt}; if (transpose_storage_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) @@ -118,6 +119,7 @@ class Tests_TransposeStorage std::tie(d_storage_transposed_srcs, d_storage_transposed_dsts, d_storage_transposed_weights, + std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, @@ -126,6 +128,7 @@ class Tests_TransposeStorage ? std::make_optional((*storage_transposed_edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) diff --git a/cpp/tests/structure/transpose_test.cpp b/cpp/tests/structure/transpose_test.cpp index c748926b749..eba99b52730 100644 --- a/cpp/tests/structure/transpose_test.cpp +++ b/cpp/tests/structure/transpose_test.cpp @@ -78,12 +78,13 @@ class Tests_Transpose rmm::device_uvector d_org_dsts(0, handle.get_stream()); std::optional> d_org_weights{std::nullopt}; if (transpose_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) @@ -109,12 +110,14 @@ class Tests_Transpose rmm::device_uvector d_transposed_dsts(0, handle.get_stream()); std::optional> d_transposed_weights{std::nullopt}; - std::tie(d_transposed_srcs, d_transposed_dsts, d_transposed_weights, std::ignore) = + std::tie( + d_transposed_srcs, d_transposed_dsts, d_transposed_weights, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) diff --git a/cpp/tests/utilities/conversion_utilities_impl.cuh b/cpp/tests/utilities/conversion_utilities_impl.cuh index fb2af023c03..6eb7357eedd 100644 --- a/cpp/tests/utilities/conversion_utilities_impl.cuh +++ b/cpp/tests/utilities/conversion_utilities_impl.cuh @@ -56,11 +56,12 @@ graph_to_host_compressed_sparse( rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, renumber_map); if constexpr (is_multi_gpu) { @@ -146,11 +147,12 @@ graph_to_host_coo( rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, renumber_map); if constexpr (is_multi_gpu) { @@ -206,11 +208,12 @@ graph_to_device_coo( rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, renumber_map); if constexpr (is_multi_gpu) { @@ -292,11 +295,12 @@ mg_graph_to_sg_graph( rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, renumber_map); d_src = cugraph::test::device_gatherv( diff --git a/cpp/tests/utilities/debug_utilities_impl.hpp b/cpp/tests/utilities/debug_utilities_impl.hpp index d4b9f356e96..d30c4b3ac85 100644 --- a/cpp/tests/utilities/debug_utilities_impl.hpp +++ b/cpp/tests/utilities/debug_utilities_impl.hpp @@ -31,9 +31,9 @@ void print_edges( std::optional> edge_weight_view, std::optional> renumber_map) { - auto [srcs, dsts, weights, edge_ids] = - cugraph::decompress_to_edgelist( - handle, graph_view, edge_weight_view, std::nullopt, renumber_map); + auto [srcs, dsts, weights, edge_ids, edge_types] = cugraph:: + decompress_to_edgelist( + handle, graph_view, edge_weight_view, std::nullopt, std::nullopt, renumber_map); raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); if (weights) { From 45371cbde5411a910130787337f9eccb7fd683c7 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Tue, 14 May 2024 10:18:44 -0400 Subject: [PATCH 13/13] [FEA] Support Seed Retention for Sampling with Renumbering (#4355) Exposes the ability to retain seeds even if they have no outgoing edges (and therefore are not sampled). Required to fix the current bug in cuGraph-PyG involving batch size and dropping seeds. Currently, this functionality can't be exposed through the MG Python API (#4358) but exposing it through the pylibcugraph API is sufficient to resolve this issue. This PR does expose it through the SG Python API. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4355 --- cpp/include/cugraph_c/sampling_algorithms.h | 12 ++++ cpp/src/c_api/uniform_neighbor_sampling.cpp | 60 ++++++++++++++----- cpp/tests/c_api/create_graph_test.c | 1 + .../c_api/mg_uniform_neighbor_sample_test.c | 4 ++ .../c_api/uniform_neighbor_sample_test.c | 2 + .../sampling/uniform_neighbor_sample.py | 15 ++++- .../sampling/test_uniform_neighbor_sample.py | 40 +++++++++++++ .../pylibcugraph/_cugraph_c/algorithms.pxd | 6 ++ .../_cugraph_c/sampling_algorithms.pxd | 3 +- .../pylibcugraph/uniform_neighbor_sample.pyx | 32 ++++++++++ 10 files changed, 158 insertions(+), 17 deletions(-) diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 5760d2098aa..859eaca7f3b 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -236,6 +236,15 @@ typedef enum cugraph_compression_type_t { cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t** options, cugraph_error_t** error); +/** + * @ingroup samplingC + * @brief Set flag to retain seeds (original sources) + * + * @param options - opaque pointer to the sampling options + * @param value - Boolean value to assign to the option + */ +void cugraph_sampling_set_retain_seeds(cugraph_sampling_options_t* options, bool_t value); + /** * @ingroup samplingC * @brief Set flag to renumber results @@ -335,6 +344,8 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options); * output. If specified then the all data from @p label_list[i] will be shuffled to rank @p. This * cannot be specified unless @p start_vertex_labels is also specified * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. + * @param [in] label_offsets Device array of the offsets for each label in the seed list. This + * parameter is only used with the retain_seeds option. * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm. * We only support fanout values of type INT32 * @param [in/out] rng_state State of the random number generator, updated with each call @@ -354,6 +365,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( const cugraph_type_erased_device_array_view_t* start_vertex_labels, const cugraph_type_erased_device_array_view_t* label_list, const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, cugraph_rng_state_t* rng_state, const cugraph_sampling_options_t* options, diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index 100e81a5bd2..45609fc0e01 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -40,6 +40,7 @@ struct cugraph_sampling_options_t { bool_t renumber_results_{FALSE}; cugraph_compression_type_t compression_type_{cugraph_compression_type_t::COO}; bool_t compress_per_hop_{FALSE}; + bool_t retain_seeds_{FALSE}; }; struct cugraph_sample_result_t { @@ -68,6 +69,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_labels_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_list_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_to_comm_rank_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -81,6 +83,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct cugraph_type_erased_device_array_view_t const* start_vertex_labels, cugraph_type_erased_device_array_view_t const* label_list, cugraph_type_erased_device_array_view_t const* label_to_comm_rank, + cugraph_type_erased_device_array_view_t const* label_offsets, cugraph_type_erased_host_array_view_t const* fan_out, cugraph_rng_state_t* rng_state, cugraph::c_api::cugraph_sampling_options_t options, @@ -99,6 +102,9 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct label_to_comm_rank_( reinterpret_cast( label_to_comm_rank)), + label_offsets_( + reinterpret_cast( + label_offsets)), fan_out_( reinterpret_cast(fan_out)), rng_state_(reinterpret_cast(rng_state)), @@ -267,8 +273,13 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct std::move(edge_id), std::move(edge_type), std::move(hop), - std::nullopt, - std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + label_offsets_->as_type(), label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -304,8 +315,13 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct std::move(edge_id), std::move(edge_type), std::move(hop), - std::nullopt, - std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + label_offsets_->as_type(), label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -402,6 +418,12 @@ extern "C" cugraph_error_code_t cugraph_sampling_options_create( return CUGRAPH_SUCCESS; } +extern "C" void cugraph_sampling_set_retain_seeds(cugraph_sampling_options_t* options, bool_t value) +{ + auto internal_pointer = reinterpret_cast(options); + internal_pointer->retain_seeds_ = value; +} + extern "C" void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, bool_t value) { @@ -871,6 +893,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( const cugraph_type_erased_device_array_view_t* start_vertex_labels, const cugraph_type_erased_device_array_view_t* label_list, const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, cugraph_rng_state_t* rng_state, const cugraph_sampling_options_t* options, @@ -878,6 +901,13 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error) { + auto options_cpp = *reinterpret_cast(options); + + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (label_offsets != nullptr), + CUGRAPH_INVALID_INPUT, + "must specify label_offsets if retain_seeds is true", + *error); + CAPI_EXPECTS((start_vertex_labels == nullptr) || (reinterpret_cast( start_vertex_labels) @@ -911,16 +941,16 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( "fan_out should be of type int", *error); - uniform_neighbor_sampling_functor functor{ - handle, - graph, - start_vertices, - start_vertex_labels, - label_list, - label_to_comm_rank, - fan_out, - rng_state, - *reinterpret_cast(options), - do_expensive_check}; + uniform_neighbor_sampling_functor functor{handle, + graph, + start_vertices, + start_vertex_labels, + label_list, + label_to_comm_rank, + label_offsets, + fan_out, + rng_state, + std::move(options_cpp), + do_expensive_check}; return cugraph::c_api::run_algorithm(graph, functor, result, error); } diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c index 758624a89e9..41b8691e79c 100644 --- a/cpp/tests/c_api/create_graph_test.c +++ b/cpp/tests/c_api/create_graph_test.c @@ -268,6 +268,7 @@ int test_create_sg_graph_csr() NULL, NULL, NULL, + NULL, h_fan_out_view, rng_state, sampling_options, diff --git a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c index a32056a9f15..3d8fb02ed46 100644 --- a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c @@ -133,6 +133,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle d_start_labels_view, NULL, NULL, + NULL, h_fan_out_view, rng_state, sampling_options, @@ -565,6 +566,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle) d_label_view, NULL, NULL, + NULL, h_fan_out_view, rng_state, sampling_options, @@ -841,6 +843,7 @@ int test_uniform_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handl d_start_labels_view, d_label_list_view, d_label_to_output_comm_rank_view, + NULL, h_fan_out_view, rng_state, sampling_options, @@ -1099,6 +1102,7 @@ int test_uniform_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* ha d_start_labels_view, d_label_list_view, d_label_to_output_comm_rank_view, + NULL, h_fan_out_view, rng_state, sampling_options, diff --git a/cpp/tests/c_api/uniform_neighbor_sample_test.c b/cpp/tests/c_api/uniform_neighbor_sample_test.c index 94f0f788354..451dbca51a7 100644 --- a/cpp/tests/c_api/uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/uniform_neighbor_sample_test.c @@ -140,6 +140,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle d_start_labels_view, NULL, NULL, + NULL, h_fan_out_view, rng_state, sampling_options, @@ -661,6 +662,7 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha d_start_labels_view, NULL, NULL, + NULL, h_fan_out_view, rng_state, sampling_options, diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py index 86b33594ed7..eafadfa4ff0 100644 --- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py @@ -71,6 +71,8 @@ def uniform_neighbor_sample( prior_sources_behavior: str = None, deduplicate_sources: bool = False, renumber: bool = False, + retain_seeds: bool = False, + label_offsets: Sequence = None, use_legacy_names: bool = True, # deprecated compress_per_hop: bool = False, compression: str = "COO", @@ -142,6 +144,15 @@ def uniform_neighbor_sample( will return the renumber map and renumber map offsets as an additional dataframe. + retain_seeds: bool, optional (default=False) + If True, will retain the original seeds (original source vertices) + in the output even if they do not have outgoing neighbors. + + label_offsets: integer sequence, optional (default=None) + Offsets of each label within the start vertex list. + Only used if retain_seeds is True. Required if retain_seeds + is True. + use_legacy_names: bool, optional (default=True) Whether to use the legacy column names (sources, destinations). If True, will use "sources" and "destinations" as the column names. @@ -342,13 +353,15 @@ def uniform_neighbor_sample( else None, h_fan_out=fanout_vals, with_replacement=with_replacement, - do_expensive_check=False, + do_expensive_check=True, with_edge_properties=with_edge_properties, random_state=random_state, prior_sources_behavior=prior_sources_behavior, deduplicate_sources=deduplicate_sources, return_hops=return_hops, renumber=renumber, + retain_seeds=retain_seeds, + label_offsets=label_offsets, compression=compression, compress_per_hop=compress_per_hop, return_dict=True, diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 560b80993d9..304ead6fea9 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -963,6 +963,46 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed): assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])]) +def test_uniform_neighbor_sample_retain_seeds(): + src = cupy.array([0, 1, 2, 3, 4, 5], dtype="int64") + dst = cupy.array([2, 3, 1, 7, 5, 6], dtype="int64") + + seeds = cupy.array([6, 0, 1, 7], dtype="int64") + batch = cupy.array([0, 0, 1, 1], dtype="int32") + batch_offsets = cupy.array([0, 2, 4], dtype="int64") + + fanout = [2, 2] + + df = cudf.DataFrame({"src": src, "dst": dst}) + + G = cugraph.MultiGraph(directed=True) + G.from_cudf_edgelist(df, source="src", destination="dst") + + batch_df = cudf.DataFrame({"seeds": seeds, "batch": batch}) + batch_offsets_s = cudf.Series(batch_offsets, name="batch_offsets") + results, offsets, renumber_map = cugraph.uniform_neighbor_sample( + G, + batch_df, + fanout, + with_replacement=False, + with_edge_properties=True, + with_batch_ids=True, + random_state=62, + return_offsets=True, + label_offsets=batch_offsets_s, + return_hops=True, + prior_sources_behavior="exclude", + deduplicate_sources=True, + renumber=True, + retain_seeds=True, + compress_per_hop=False, + ) + + assert offsets.renumber_map_offsets.dropna().values_host.tolist() == [0, 4, 7] + assert renumber_map.renumber_map.values_host[[0, 1]].tolist() == [0, 6] + assert renumber_map.renumber_map.values_host[[4, 5]].tolist() == [1, 7] + + @pytest.mark.sg @pytest.mark.skip(reason="needs to be written!") def test_uniform_neighbor_sample_dcsr_dcsc_global(): diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index b0e7ffaf82d..4da7c4328fd 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -292,6 +292,12 @@ cdef extern from "cugraph_c/algorithms.h": bool_t value, ) + cdef void \ + cugraph_sampling_set_retain_seeds( + cugraph_sampling_options_t* options, + bool_t value, + ) + cdef void \ cugraph_sampling_set_with_replacement( cugraph_sampling_options_t* options, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index c32b57f8621..dbd3ef4b7e1 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -50,6 +50,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": const cugraph_type_erased_device_array_view_t* start_vertex_labels, const cugraph_type_erased_device_array_view_t* label_list, const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, cugraph_rng_state_t* rng_state, const cugraph_sampling_options_t* options, diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx index b4145a80095..f002622f497 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx @@ -49,6 +49,7 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sampling_set_renumber_results, cugraph_sampling_set_compress_per_hop, cugraph_sampling_set_compression_type, + cugraph_sampling_set_retain_seeds, ) from pylibcugraph._cugraph_c.sampling_algorithms cimport ( cugraph_uniform_neighbor_sample, @@ -89,10 +90,12 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, batch_id_list=None, label_list=None, label_to_output_comm_rank=None, + label_offsets=None, prior_sources_behavior=None, deduplicate_sources=False, return_hops=False, renumber=False, + retain_seeds=False, compression='COO', compress_per_hop=False, random_state=None, @@ -143,6 +146,9 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, worker that should hold results for that batch id. Defaults to NULL (does nothing) + label_offsets: list[int] (Optional) + Offsets of each label within the start vertex list. + prior_sources_behavior: str (Optional) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -160,6 +166,11 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, per-batch basis and return the renumber map and batch offsets in additional to the standard returns. + retain_seeds: bool (Optional) + If True, will retain the original seeds (original source vertices) + in the output even if they do not have outgoing neighbors. + Defaults to False. + compression: str (Optional) Options: COO (default), CSR, CSC, DCSR, DCSR Sets the compression format for the returned samples. @@ -210,6 +221,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(batch_id_list, "batch_id_list", True) assert_CAI_type(label_list, "label_list", True) assert_CAI_type(label_to_output_comm_rank, "label_to_output_comm_rank", True) + assert_CAI_type(label_offsets, "label_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") cdef cugraph_sample_result_t* result_ptr @@ -234,6 +246,11 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, cai_label_to_output_comm_rank_ptr = \ label_to_output_comm_rank.__cuda_array_interface__['data'][0] + cdef uintptr_t cai_label_offsets_ptr + if label_offsets is not None: + cai_label_offsets_ptr = \ + label_offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t ai_fan_out_ptr = \ h_fan_out.__array_interface__["data"][0] @@ -270,6 +287,17 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, get_c_type_from_numpy_type(label_to_output_comm_rank.dtype) ) + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL + if retain_seeds: + if label_offsets is None: + raise ValueError("Must provide label offsets if retain_seeds is True") + label_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_offsets_ptr, + len(label_offsets), + get_c_type_from_numpy_type(label_offsets.dtype) + ) + cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = \ cugraph_type_erased_host_array_view_create( ai_fan_out_ptr, @@ -323,6 +351,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, cugraph_sampling_set_renumber_results(sampling_options, c_renumber) cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e) cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop) + cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds) error_code = cugraph_uniform_neighbor_sample( c_resource_handle_ptr, @@ -331,6 +360,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, batch_id_ptr, label_list_ptr, label_to_output_comm_rank_ptr, + label_offsets_ptr, fan_out_ptr, rng_state_ptr, sampling_options, @@ -347,6 +377,8 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, cugraph_type_erased_host_array_view_free(fan_out_ptr) if batch_id_list is not None: cugraph_type_erased_device_array_view_free(batch_id_ptr) + if label_offsets is not None: + cugraph_type_erased_device_array_view_free(label_offsets_ptr) # Have the SamplingResult instance assume ownership of the result data. result = SamplingResult()