Skip to content

Commit

Permalink
Resolves maximum spanning tree bug when using Edgelist instead of Adj…
Browse files Browse the repository at this point in the history
…list (#2256)

This PR resolves #2251 , where minimum spanning tree and maximum spanning tree had different behaviors when using edge list data versus adjacency list data, specifically that a call to compute the adjacency list had to be made before running maximum spanning tree because of how weights were calculated. After adding a check to verify that the adjacency list was computed, examples for both mst algorithms were uncommented.

Authors:
  - https://github.com/betochimas

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #2256
  • Loading branch information
betochimas authored May 5, 2022
1 parent be2af8d commit 3506893
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 15 deletions.
32 changes: 29 additions & 3 deletions python/cugraph/cugraph/tests/test_maximum_spanning_tree.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -37,9 +37,25 @@
print("Networkx version : {} ".format(nx.__version__))


# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


def _get_param_args(param_name, param_values):
"""
Returns a tuple of (<param_name>, <pytest.param list>) which can be applied
as the args to pytest.mark.parametrize(). The pytest.param list also
contains param id string formed from the param name and values.
"""
return (param_name,
[pytest.param(v, id=f"{param_name}={v}") for v in param_values])


@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
def test_maximum_spanning_tree_nx(graph_file):
gc.collect()
# cugraph
cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
G = cugraph.Graph()
Expand All @@ -64,6 +80,17 @@ def test_maximum_spanning_tree_nx(graph_file):
utils.compare_mst(cugraph_mst, mst_nx)


@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False]))
def test_maximum_spanning_tree_graph_repr_compat(graph_file, use_adjlist):
cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
G = cugraph.Graph()
G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2")
if use_adjlist:
G.view_adj_list()
cugraph.maximum_spanning_tree(G)


DATASETS_SIZES = [
100000,
1000000,
Expand All @@ -75,7 +102,6 @@ def test_maximum_spanning_tree_nx(graph_file):
@pytest.mark.skip(reason="Skipping large tests")
@pytest.mark.parametrize("graph_size", DATASETS_SIZES)
def test_random_maximum_spanning_tree_nx(graph_size):
gc.collect()
rmm.reinitialize(managed_memory=True)
df = utils.random_edgelist(
e=graph_size,
Expand Down
32 changes: 29 additions & 3 deletions python/cugraph/cugraph/tests/test_minimum_spanning_tree.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -37,9 +37,25 @@
print("Networkx version : {} ".format(nx.__version__))


# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


def _get_param_args(param_name, param_values):
"""
Returns a tuple of (<param_name>, <pytest.param list>) which can be applied
as the args to pytest.mark.parametrize(). The pytest.param list also
contains param id string formed from the param name and values.
"""
return (param_name,
[pytest.param(v, id=f"{param_name}={v}") for v in param_values])


@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
def test_minimum_spanning_tree_nx(graph_file):
gc.collect()
# cugraph
cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
G = cugraph.Graph()
Expand All @@ -64,6 +80,17 @@ def test_minimum_spanning_tree_nx(graph_file):
utils.compare_mst(cugraph_mst, mst_nx)


@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS)
@pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False]))
def test_minimum_spanning_tree_graph_repr_compat(graph_file, use_adjlist):
cuG = utils.read_csv_file(graph_file, read_weights_in_sp=True)
G = cugraph.Graph()
G.from_cudf_edgelist(cuG, source="0", destination="1", edge_attr="2")
if use_adjlist:
G.view_adj_list()
cugraph.minimum_spanning_tree(G)


DATASETS_SIZES = [
100000,
1000000,
Expand All @@ -75,7 +102,6 @@ def test_minimum_spanning_tree_nx(graph_file):
@pytest.mark.skip(reason="Skipping large tests")
@pytest.mark.parametrize("graph_size", DATASETS_SIZES)
def test_random_minimum_spanning_tree_nx(graph_size):
gc.collect()
rmm.reinitialize(managed_memory=True)
df = utils.random_edgelist(
e=graph_size,
Expand Down
17 changes: 8 additions & 9 deletions python/cugraph/cugraph/tree/minimum_spanning_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def _maximum_spanning_tree_subgraph(G):
if G.is_directed():
raise ValueError("input graph must be undirected")

if not G.adjlist:
G.view_adj_list()

if G.adjlist.weights is not None:
G.adjlist.weights = G.adjlist.weights.mul(-1)

Expand Down Expand Up @@ -89,15 +92,13 @@ def minimum_spanning_tree(
Examples
--------
>>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t',
>>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ',
... dtype=['int32', 'int32', 'float32'], header=None)
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(M, source='0', destination='1')
>>> # cugraph.minimum_spanning_tree(G)
>>> G_mst = cugraph.minimum_spanning_tree(G)
"""
# FIXME: Uncomment out the above example

G, isNx = ensure_cugraph_obj_for_nx(G)

if isNx is True:
Expand All @@ -112,7 +113,7 @@ def maximum_spanning_tree(
):
"""
Returns a maximum spanning tree (MST) or forest (MSF) on an undirected
graph
graph. Also computes the adjacency list if G does not have one.
Parameters
----------
Expand All @@ -138,15 +139,13 @@ def maximum_spanning_tree(
Examples
--------
>>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter='\t',
>>> M = cudf.read_csv(datasets_path / 'netscience.csv', delimiter=' ',
... dtype=['int32', 'int32', 'float32'], header=None)
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(M, source='0', destination='1')
>>> # cugraph.maximum_spanning_tree(G)
>>> G_mst = cugraph.maximum_spanning_tree(G)
"""
# FIXME: Uncomment out the above (broken) example

G, isNx = ensure_cugraph_obj_for_nx(G)

if isNx is True:
Expand Down

0 comments on commit 3506893

Please sign in to comment.