Resolves maximum spanning tree bug when using Edgelist instead of Adj…

…list (#2256) This PR resolves #2251 , where minimum spanning tree and maximum spanning tree had different behaviors when using edge list data versus adjacency list data, specifically that a call to compute the adjacency list had to be made before running maximum spanning tree because of how weights were calculated. After adding a check to verify that the adjacency list was computed, examples for both mst algorithms were uncommented. Authors: - https://github.com/betochimas Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: #2256
rapidsai · May 5, 2022 · 3506893 · 3506893
1 parent be2af8d
commit 3506893
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 15 deletions.
diff --git a/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/cugraph/tests/test_maximum_spanning_tree.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -37,9 +37,25 @@
 print("Networkx version : {} ".format(nx.__version__))
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+def _get_param_args(param_name, param_values):
+    """
+    Returns a tuple of (<param_name>, <pytest.param list>) which can be applied
+    as the args to pytest.mark.parametrize(). The pytest.param list also
+    contains param id string formed from the param name and values.
+    """
+    return (param_name,
+            [pytest.param(v, id=f"{param_name}={v}") for v in param_values])
+
+
 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
 def test_maximum_spanning_tree_nx(graph_file):
-    gc.collect()
     # cugraph
     cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
     G = cugraph.Graph()
@@ -64,6 +80,17 @@ def test_maximum_spanning_tree_nx(graph_file):
     utils.compare_mst(cugraph_mst, mst_nx)
 
 
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
+@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False]))
+def test_maximum_spanning_tree_graph_repr_compat(graph_file, use_adjlist):
+    cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
+    G = cugraph.Graph()
+    G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2")
+    if use_adjlist:
+        G.view_adj_list()
+    cugraph.maximum_spanning_tree(G)
+
+
 DATASETS_SIZES = [
     100000,
     1000000,
@@ -75,7 +102,6 @@ def test_maximum_spanning_tree_nx(graph_file):
 @pytest.mark.skip(reason="Skipping large tests")
 @pytest.mark.parametrize("graph_size", DATASETS_SIZES)
 def test_random_maximum_spanning_tree_nx(graph_size):
-    gc.collect()
     rmm.reinitialize(managed_memory=True)
     df = utils.random_edgelist(
         e=graph_size,

diff --git a/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/cugraph/tests/test_minimum_spanning_tree.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -37,9 +37,25 @@
 print("Networkx version : {} ".format(nx.__version__))
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+def _get_param_args(param_name, param_values):
+    """
+    Returns a tuple of (<param_name>, <pytest.param list>) which can be applied
+    as the args to pytest.mark.parametrize(). The pytest.param list also
+    contains param id string formed from the param name and values.
+    """
+    return (param_name,
+            [pytest.param(v, id=f"{param_name}={v}") for v in param_values])
+
+
 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
 def test_minimum_spanning_tree_nx(graph_file):
-    gc.collect()
     # cugraph
     cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
     G = cugraph.Graph()
@@ -64,6 +80,17 @@ def test_minimum_spanning_tree_nx(graph_file):
     utils.compare_mst(cugraph_mst, mst_nx)
 
 
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
+@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False]))
+def test_minimum_spanning_tree_graph_repr_compat(graph_file, use_adjlist):
+    cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
+    G = cugraph.Graph()
+    G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2")
+    if use_adjlist:
+        G.view_adj_list()
+    cugraph.minimum_spanning_tree(G)
+
+
 DATASETS_SIZES = [
     100000,
     1000000,
@@ -75,7 +102,6 @@ def test_minimum_spanning_tree_nx(graph_file):
 @pytest.mark.skip(reason="Skipping large tests")
 @pytest.mark.parametrize("graph_size", DATASETS_SIZES)
 def test_random_minimum_spanning_tree_nx(graph_size):
-    gc.collect()
     rmm.reinitialize(managed_memory=True)
     df = utils.random_edgelist(
         e=graph_size,

diff --git a/python/cugraph/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/cugraph/tree/minimum_spanning_tree.py
@@ -38,6 +38,9 @@ def _maximum_spanning_tree_subgraph(G):
     if G.is_directed():
         raise ValueError("input graph must be undirected")
 
+    if not G.adjlist:
+        G.view_adj_list()
+
     if G.adjlist.weights is not None:
         G.adjlist.weights = G.adjlist.weights.mul(-1)
 
@@ -89,15 +92,13 @@ def minimum_spanning_tree(
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t',
+    >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ',
     ...                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
     >>> G.from_cudf_edgelist(M, source='0', destination='1')
-    >>> # cugraph.minimum_spanning_tree(G)
+    >>> G_mst = cugraph.minimum_spanning_tree(G)
 
     """
-    # FIXME: Uncomment out the above example
-
     G, isNx = ensure_cugraph_obj_for_nx(G)
 
     if isNx is True:
@@ -112,7 +113,7 @@ def maximum_spanning_tree(
 ):
     """
     Returns a maximum spanning tree (MST) or forest (MSF) on an undirected
-    graph
+    graph. Also computes the adjacency list if G does not have one.
 
     Parameters
     ----------
@@ -138,15 +139,13 @@ def maximum_spanning_tree(
 
     Examples
     --------
-    >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t',
+    >>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ',
     ...                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
     >>> G.from_cudf_edgelist(M, source='0', destination='1')
-    >>> # cugraph.maximum_spanning_tree(G)
+    >>> G_mst = cugraph.maximum_spanning_tree(G)
 
     """
-    # FIXME: Uncomment out the above (broken) example
-
     G, isNx = ensure_cugraph_obj_for_nx(G)
 
     if isNx is True: