From 35068932a763f53e78a665699b9936e0b339a326 Mon Sep 17 00:00:00 2001 From: betochimas <97180625+betochimas@users.noreply.github.com> Date: Wed, 4 May 2022 21:41:32 -0700 Subject: [PATCH] Resolves maximum spanning tree bug when using Edgelist instead of Adjlist (#2256) This PR resolves #2251 , where minimum spanning tree and maximum spanning tree had different behaviors when using edge list data versus adjacency list data, specifically that a call to compute the adjacency list had to be made before running maximum spanning tree because of how weights were calculated. After adding a check to verify that the adjacency list was computed, examples for both mst algorithms were uncommented. Authors: - https://github.com/betochimas Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/2256 --- .../tests/test_maximum_spanning_tree.py | 32 +++++++++++++++++-- .../tests/test_minimum_spanning_tree.py | 32 +++++++++++++++++-- .../cugraph/tree/minimum_spanning_tree.py | 17 +++++----- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py index 311f28bd6f8..341fc1b26d6 100644 --- a/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py +++ b/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -37,9 +37,25 @@ print("Networkx version : {} ".format(nx.__version__)) +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +def _get_param_args(param_name, param_values): + """ + Returns a tuple of (, ) which can be applied + as the args to pytest.mark.parametrize(). The pytest.param list also + contains param id string formed from the param name and values. + """ + return (param_name, + [pytest.param(v, id=f"{param_name}={v}") for v in param_values]) + + @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_maximum_spanning_tree_nx(graph_file): - gc.collect() # cugraph cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True) G = cugraph.Graph() @@ -64,6 +80,17 @@ def test_maximum_spanning_tree_nx(graph_file): utils.compare_mst(cugraph_mst, mst_nx) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False])) +def test_maximum_spanning_tree_graph_repr_compat(graph_file, use_adjlist): + cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True) + G = cugraph.Graph() + G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2") + if use_adjlist: + G.view_adj_list() + cugraph.maximum_spanning_tree(G) + + DATASETS_SIZES = [ 100000, 1000000, @@ -75,7 +102,6 @@ def test_maximum_spanning_tree_nx(graph_file): @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_maximum_spanning_tree_nx(graph_size): - gc.collect() rmm.reinitialize(managed_memory=True) df = utils.random_edgelist( e=graph_size, diff --git a/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py index d1588507bce..97b5630eca0 100644 --- a/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py +++ b/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -37,9 +37,25 @@ print("Networkx version : {} ".format(nx.__version__)) +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +def _get_param_args(param_name, param_values): + """ + Returns a tuple of (, ) which can be applied + as the args to pytest.mark.parametrize(). The pytest.param list also + contains param id string formed from the param name and values. + """ + return (param_name, + [pytest.param(v, id=f"{param_name}={v}") for v in param_values]) + + @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_minimum_spanning_tree_nx(graph_file): - gc.collect() # cugraph cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True) G = cugraph.Graph() @@ -64,6 +80,17 @@ def test_minimum_spanning_tree_nx(graph_file): utils.compare_mst(cugraph_mst, mst_nx) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False])) +def test_minimum_spanning_tree_graph_repr_compat(graph_file, use_adjlist): + cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True) + G = cugraph.Graph() + G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2") + if use_adjlist: + G.view_adj_list() + cugraph.minimum_spanning_tree(G) + + DATASETS_SIZES = [ 100000, 1000000, @@ -75,7 +102,6 @@ def test_minimum_spanning_tree_nx(graph_file): @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_minimum_spanning_tree_nx(graph_size): - gc.collect() rmm.reinitialize(managed_memory=True) df = utils.random_edgelist( e=graph_size, diff --git a/python/cugraph/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/cugraph/tree/minimum_spanning_tree.py index 21aa5da9510..8ad1af0f704 100644 --- a/python/cugraph/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/cugraph/tree/minimum_spanning_tree.py @@ -38,6 +38,9 @@ def _maximum_spanning_tree_subgraph(G): if G.is_directed(): raise ValueError("input graph must be undirected") + if not G.adjlist: + G.view_adj_list() + if G.adjlist.weights is not None: G.adjlist.weights = G.adjlist.weights.mul(-1) @@ -89,15 +92,13 @@ def minimum_spanning_tree( Examples -------- - >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t', + >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') - >>> # cugraph.minimum_spanning_tree(G) + >>> G_mst = cugraph.minimum_spanning_tree(G) """ - # FIXME: Uncomment out the above example - G, isNx = ensure_cugraph_obj_for_nx(G) if isNx is True: @@ -112,7 +113,7 @@ def maximum_spanning_tree( ): """ Returns a maximum spanning tree (MST) or forest (MSF) on an undirected - graph + graph. Also computes the adjacency list if G does not have one. Parameters ---------- @@ -138,15 +139,13 @@ def maximum_spanning_tree( Examples -------- - >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t', + >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1') - >>> # cugraph.maximum_spanning_tree(G) + >>> G_mst = cugraph.maximum_spanning_tree(G) """ - # FIXME: Uncomment out the above (broken) example - G, isNx = ensure_cugraph_obj_for_nx(G) if isNx is True: