Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add louvain_communities to cugraph-nx #3803

Merged
merged 16 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/cugraph-nx/cugraph_nx/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import centrality
from . import centrality, community
from .centrality import *
13 changes: 13 additions & 0 deletions python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .louvain import *
52 changes: 52 additions & 0 deletions python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import pylibcugraph as plc

from cugraph_nx.convert import _to_undirected_graph
from cugraph_nx.utils import _groupby, networkx_algorithm, not_implemented_for

__all__ = ["louvain_communities"]


@not_implemented_for("directed")
rlratzel marked this conversation as resolved.
Show resolved Hide resolved
@networkx_algorithm
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
):
"""`threshold` and `seed` parameters are currently ignored."""
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
G = _to_undirected_graph(G, weight)
if G.row_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
vertices, clusters, modularity = plc.louvain(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
max_level=sys.maxsize, # TODO: add this parameter to NetworkX
resolution=resolution,
# threshold=threshold, # TODO: add this parameter to PLC
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]


@louvain_communities._can_run
def _(G, weight="weight", resolution=1, threshold=0.0000001, seed=None):
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
try:
return not G.is_directed()
except Exception: # pragma: no cover (safety)
return False
rlratzel marked this conversation as resolved.
Show resolved Hide resolved
13 changes: 12 additions & 1 deletion python/cugraph-nx/cugraph_nx/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import cugraph_nx as cnx

if TYPE_CHECKING:
from collections.abc import Iterator
from collections.abc import Iterable, Iterator

from cugraph_nx.typing import (
AttrKey,
Expand Down Expand Up @@ -532,6 +532,17 @@ def _get_plc_graph(
do_expensive_check=False,
)

def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
"""Convert an iterable of node IDs to an iterable of node keys."""
if (id_to_key := self.id_to_key) is not None:
return map(id_to_key.__getitem__, node_ids)
return node_ids

def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
if self.key_to_id is None:
return node_ids.tolist()
return list(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearrays_to_dict(
self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue]
) -> dict[NodeKey, NodeValue]:
Expand Down
17 changes: 9 additions & 8 deletions python/cugraph-nx/cugraph_nx/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,18 @@ def key(testpath):
return (testname, frozenset({classname, filename}))
return (testname, frozenset({filename}))

string_attribute = "unable to handle string attributes"

skip = {
key("test_pajek.py:TestPajek.test_ignored_attribute"): string_attribute,
key(
"test_agraph.py:TestAGraph.test_no_warnings_raised"
): "pytest.warn(None) deprecated",
louvain_different = (
"Louvain may be different due to RNG or unsupported threshold parameter"
)
xfail = {
key("test_louvain.py:test_karate_club_partition"): louvain_different,
key("test_louvain.py:test_none_weight_param"): louvain_different,
key("test_louvain.py:test_multigraph"): louvain_different,
key("test_louvain.py:test_threshold"): louvain_different,
}
for item in items:
kset = set(item.keywords)
for (test_name, keywords), reason in skip.items():
for (test_name, keywords), reason in xfail.items():
if item.name == test_name and keywords.issubset(kset):
item.add_marker(pytest.mark.xfail(reason=reason))

Expand Down
35 changes: 35 additions & 0 deletions python/cugraph-nx/cugraph_nx/tests/test_match_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import inspect

import networkx as nx
Expand All @@ -26,8 +27,11 @@ def test_match_signature_and_names():
dispatchable_func = nx.utils.backends._registered_algorithms[name]
orig_func = dispatchable_func.orig_func
# Matching signatures?
# TODO: allow cnx functions to have additional keyword-only parameters
sig = inspect.signature(orig_func)
assert sig == inspect.signature(func)
if func.can_run is not cnx.utils.decorators._default_can_run:
assert sig == inspect.signature(func.can_run)
# Matching function names?
assert func.__name__ == dispatchable_func.__name__ == orig_func.__name__
# Matching dispatch names?
Expand All @@ -38,3 +42,34 @@ def test_match_signature_and_names():
== dispatchable_func.__module__
== orig_func.__module__
)
# Matching package layout (i.e., which modules have the function)?
cnx_path = func.__module__
name = func.__name__
while "." in cnx_path:
# This only walks up the module tree and does not check sibling modules
cnx_path, mod_name = cnx_path.rsplit(".", 1)
nx_path = cnx_path.replace("cugraph_nx", "networkx")
cnx_mod = importlib.import_module(cnx_path)
nx_mod = importlib.import_module(nx_path)
# Is the function present in the current module?
present_in_cnx = hasattr(cnx_mod, name)
present_in_nx = hasattr(nx_mod, name)
if present_in_cnx is not present_in_nx: # pragma: no cover (debug)
if present_in_cnx:
raise AssertionError(
f"{name} exists in {cnx_path}, but not in {nx_path}"
)
raise AssertionError(
f"{name} exists in {nx_path}, but not in {cnx_path}"
)
# Is the nested module present in the current module?
present_in_cnx = hasattr(cnx_mod, mod_name)
present_in_nx = hasattr(nx_mod, mod_name)
if present_in_cnx is not present_in_nx: # pragma: no cover (debug)
if present_in_cnx:
raise AssertionError(
f"{mod_name} exists in {cnx_path}, but not in {nx_path}"
)
raise AssertionError(
f"{mod_name} exists in {nx_path}, but not in {cnx_path}"
)
1 change: 1 addition & 0 deletions python/cugraph-nx/cugraph_nx/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .decorators import *
from .misc import *
2 changes: 2 additions & 0 deletions python/cugraph-nx/cugraph_nx/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __new__(cls, func=None, *, name=None):
instance.name = func.__name__ if name is None else name
instance.can_run = _default_can_run
setattr(BackendInterface, instance.name, instance)
# Set methods so they are in __dict__
instance._can_run = instance._can_run
return instance

def _can_run(self, func):
Expand Down
45 changes: 45 additions & 0 deletions python/cugraph-nx/cugraph_nx/utils/misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp

__all__ = ["_groupby"]


def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
"""Perform a groupby operation given an array of group IDs and array of values.

Parameters
----------
groups : cp.ndarray
Array that holds the group IDs.
Group IDs are assumed to be consecutive integers from 0.
values : cp.ndarray
Array of values to be grouped according to groups.
Must be the same size as groups array.

Returns
-------
dict with group IDs as keys and cp.ndarray as values.
"""
# It would actually be easy to support groups that aren't consecutive integers,
# but let's wait until we need it to implement it.
sorted_groups = cp.argsort(groups)
sorted_values = values[sorted_groups]
rv = {}
start = 0
for i, end in enumerate(
[*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
):
rv[i] = sorted_values[start:end]
start = end
return rv
2 changes: 1 addition & 1 deletion python/cugraph-nx/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ omit = []
ignore_errors = false
precision = 1
fail_under = 0
skip_covered = true
skip_covered = false # Nice to see fully covered files when running `run_nx_tests.sh`
skip_empty = true
exclude_lines = [
"pragma: no cover",
Expand Down
8 changes: 6 additions & 2 deletions python/cugraph-nx/run_nx_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,9 @@
#
# Coverage of `cugraph_nx.algorithms` is reported and is a good sanity check that algorithms run.

# NETWORKX_GRAPH_CONVERT=cugraph NETWORKX_BACKEND_TEST_EXHAUSTIVE=True pytest --pyargs networkx "$@"
NETWORKX_TEST_BACKEND=cugraph NETWORKX_TEST_FALLBACK_TO_NX=True pytest --pyargs networkx --cov=cugraph_nx/algorithms --cov-report term-missing --no-cov-on-fail "$@"
NETWORKX_GRAPH_CONVERT=cugraph NETWORKX_BACKEND_TEST_EXHAUSTIVE=True \
NETWORKX_TEST_BACKEND=cugraph NETWORKX_TEST_FALLBACK_TO_NX=True \
pytest --pyargs networkx \
--cov=cugraph_nx/algorithms \
--cov-report term-missing --no-cov-on-fail \
"$@"