Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nx-cugraph: add relabel_nodes and convert_node_labels_to_integers #4531

Merged
merged 9 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/nx-cugraph/.flake8
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

[flake8]
max-line-length = 88
inline-quotes = "
extend-ignore =
B020,
E203,
SIM105,
SIM401,
Expand Down
3 changes: 3 additions & 0 deletions python/nx-cugraph/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ Below is the list of algorithms that are currently supported in nx-cugraph.
<a href="https://networkx.org/documentation/stable/reference/convert.html#module-networkx.convert_matrix">convert_matrix</a>
├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_pandas_edgelist.html#networkx.convert_matrix.from_pandas_edgelist">from_pandas_edgelist</a>
└─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_scipy_sparse_array.html#networkx.convert_matrix.from_scipy_sparse_array">from_scipy_sparse_array</a>
<a href="https://networkx.org/documentation/stable/reference/relabel.html#module-networkx.relabel">relabel</a>
├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.relabel.convert_node_labels_to_integers.html#networkx.relabel.convert_node_labels_to_integers">convert_node_labels_to_integers</a>
└─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.relabel.relabel_nodes.html#networkx.relabel.relabel_nodes">relabel_nodes</a>
</pre>

To request nx-cugraph backend support for a NetworkX API that is not listed
Expand Down
2 changes: 2 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"complete_graph",
"complete_multipartite_graph",
"connected_components",
"convert_node_labels_to_integers",
"core_number",
"cubical_graph",
"cycle_graph",
Expand Down Expand Up @@ -124,6 +125,7 @@
"path_graph",
"petersen_graph",
"reciprocity",
"relabel_nodes",
"reverse",
"sedgewick_maze_graph",
"shortest_path",
Expand Down
6 changes: 3 additions & 3 deletions python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,15 @@ repos:
- id: black
# - id: black-jupyter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.1
rev: v0.5.4
hooks:
- id: ruff
args: [--fix-only, --show-fixes] # --unsafe-fixes]
- repo: https://github.com/PyCQA/flake8
rev: 7.1.0
hooks:
- id: flake8
args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=SIM105'] # Why is this necessary?
args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=B020,SIM105'] # Why is this necessary?
additional_dependencies: &flake8_dependencies
# These versions need updated manually
- flake8==7.1.0
Expand All @@ -77,7 +77,7 @@ repos:
additional_dependencies: [tomli]
files: ^(nx_cugraph|docs)/
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.1
rev: v0.5.4
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
3 changes: 3 additions & 0 deletions python/nx-cugraph/nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from . import convert_matrix
from .convert_matrix import *

from . import relabel
from .relabel import *

from . import generators
from .generators import *

Expand Down
5 changes: 5 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,10 @@ def reverse(G, copy=True):
if not G.is_directed():
raise nx.NetworkXError("Cannot reverse an undirected graph.")
if isinstance(G, nx.Graph):
if not copy:
raise RuntimeError(
"Using `copy=False` is invalid when using a NetworkX graph "
"as input to `nx_cugraph.reverse`"
)
G = nxcg.from_networkx(G, preserve_all_attrs=True)
return G.reverse(copy=copy)
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,41 @@ def _bfs(G, source, *, depth_limit=None, reverse=False):
return distances[mask], predecessors[mask], node_ids[mask]


@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None):
"""`neighbors` and `sort_neighbors` parameters are not yet supported."""
if neighbors is not None:
raise NotImplementedError(
"neighbors argument in generic_bfs_edges is not currently supported"
)
if sort_neighbors is not None:
raise NotImplementedError(
"sort_neighbors argument in generic_bfs_edges is not currently supported"
)
return bfs_edges(G, source, depth_limit=depth_limit)


@generic_bfs_edges._can_run
def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None):
return neighbors is None and sort_neighbors is None
if nx.__version__[:3] <= "3.3":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

String comparison isn’t appropriate here (breaks with versions like 3.30). Try comparing version specifiers like this:

from packaging.version import parse

if parse(nx.__version__) <= parse("3.3"):
    …

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's an alternative solution: #4571


@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
def generic_bfs_edges(
G, source, neighbors=None, depth_limit=None, sort_neighbors=None
):
"""`neighbors` and `sort_neighbors` parameters are not yet supported."""
if neighbors is not None:
raise NotImplementedError(
"neighbors argument in generic_bfs_edges is not currently supported"
)
if sort_neighbors is not None:
raise NotImplementedError(
"sort_neighbors argument in generic_bfs_edges is not supported"
)
return bfs_edges(G, source, depth_limit=depth_limit)

@generic_bfs_edges._can_run
def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None):
return neighbors is None and sort_neighbors is None

else:

@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
def generic_bfs_edges(G, source, neighbors=None, depth_limit=None):
"""`neighbors` parameter is not yet supported."""
if neighbors is not None:
raise NotImplementedError(
"neighbors argument in generic_bfs_edges is not currently supported"
)
return bfs_edges(G, source, depth_limit=depth_limit)

@generic_bfs_edges._can_run
def _(G, source, neighbors=None, depth_limit=None):
return neighbors is None


@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
Expand Down
42 changes: 36 additions & 6 deletions python/nx-cugraph/nx_cugraph/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,21 @@ def func(it, edge_attr=edge_attr, dtype=dtype):
# Node values may be numpy or cupy arrays (useful for str, object, etc).
# Someday we'll let the user choose np or cp, and support edge values.
node_mask = np.fromiter(iter_mask, bool)
node_value = np.array(vals, dtype)
try:
node_value = cp.array(node_value)
node_value = np.array(vals, dtype)
except ValueError:
pass
# Handle e.g. list elements
if dtype is None or dtype == object:
node_value = np.fromiter(vals, object)
else:
raise
else:
node_mask = cp.array(node_mask)
try:
node_value = cp.array(node_value)
except ValueError:
pass
else:
node_mask = cp.array(node_mask)
node_values[node_attr] = node_value
node_masks[node_attr] = node_mask
# if vals.ndim > 1: ...
Expand All @@ -428,7 +436,12 @@ def func(it, edge_attr=edge_attr, dtype=dtype):
# Node values may be numpy or cupy arrays (useful for str, object, etc).
# Someday we'll let the user choose np or cp, and support edge values.
if dtype is None:
node_value = np.array(list(iter_values))
vals = list(iter_values)
try:
node_value = np.array(vals)
except ValueError:
# Handle e.g. list elements
node_value = np.fromiter(vals, object)
else:
node_value = np.fromiter(iter_values, dtype)
try:
Expand Down Expand Up @@ -474,6 +487,23 @@ def func(it, edge_attr=edge_attr, dtype=dtype):
return rv


def _to_tuples(ndim, L):
if ndim > 2:
L = list(map(_to_tuples.__get__(ndim - 1), L))
return list(map(tuple, L))


def _array_to_tuples(a):
"""Like ``a.tolist()``, but nested structures are tuples instead of lists.

This is only different from ``a.tolist()`` if ``a.ndim > 1``. It is used to
try to return tuples instead of lists for e.g. node values.
"""
if a.ndim > 1:
return _to_tuples(a.ndim, a.tolist())
return a.tolist()


def _iter_attr_dicts(
values: dict[AttrKey, any_ndarray[EdgeValue | NodeValue]],
masks: dict[AttrKey, any_ndarray[bool]],
Expand All @@ -482,7 +512,7 @@ def _iter_attr_dicts(
if full_attrs:
full_dicts = (
dict(zip(full_attrs, vals))
for vals in zip(*(values[attr].tolist() for attr in full_attrs))
for vals in zip(*(_array_to_tuples(values[attr]) for attr in full_attrs))
)
partial_attrs = list(values.keys() & masks.keys())
if partial_attrs:
Expand Down
95 changes: 95 additions & 0 deletions python/nx-cugraph/nx_cugraph/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ def key(testpath):
louvain_different = "Louvain may be different due to RNG"
no_string_dtype = "string edge values not currently supported"
sssp_path_different = "sssp may choose a different valid path"
no_object_dtype_for_edges = (
"Edges don't support object dtype (lists, strings, etc.)"
)
tuple_elements_preferred = "elements are tuples instead of lists"
nx_cugraph_in_test_setup = (
"nx-cugraph Graph is incompatible in test setup in nx versions < 3.3"
)

xfail = {
# This is removed while strongly_connected_components() is not
Expand All @@ -91,6 +98,81 @@ def key(testpath):
"test_cycles.py:TestMinimumCycleBasis."
"test_gh6787_and_edge_attribute_names"
): sssp_path_different,
key(
"test_graph_hashing.py:test_isomorphic_edge_attr"
): no_object_dtype_for_edges,
key(
"test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr"
): no_object_dtype_for_edges,
key(
"test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash"
): no_object_dtype_for_edges,
key(
"test_graph_hashing.py:"
"test_isomorphic_edge_attr_and_node_attr_subgraph_hash"
): no_object_dtype_for_edges,
key(
"test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph"
): no_object_dtype_for_edges,
key(
"test_summarization.py:TestSNAPUndirected.test_summary_graph"
): no_object_dtype_for_edges,
key(
"test_summarization.py:TestSNAPDirected.test_summary_graph"
): no_object_dtype_for_edges,
key("test_gexf.py:TestGEXF.test_relabel"): no_object_dtype_for_edges,
key(
"test_gml.py:TestGraph.test_parse_gml_cytoscape_bug"
): no_object_dtype_for_edges,
key("test_gml.py:TestGraph.test_parse_gml"): no_object_dtype_for_edges,
key("test_gml.py:TestGraph.test_read_gml"): no_object_dtype_for_edges,
key("test_gml.py:TestGraph.test_data_types"): no_object_dtype_for_edges,
key(
"test_gml.py:TestPropertyLists.test_reading_graph_with_list_property"
): no_object_dtype_for_edges,
key(
"test_relabel.py:"
"test_relabel_preserve_node_order_partial_mapping_with_copy_false"
): "Node order is preserved when relabeling with partial mapping",
Comment on lines +133 to +136
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully grok why this test is checking that the node order changed. It seems like we'd want to keep the node order the same (and we do). See:

key(
"test_gml.py:"
"TestPropertyLists.test_reading_graph_with_single_element_list_property"
): tuple_elements_preferred,
key(
"test_relabel.py:"
"TestRelabel.test_relabel_multidigraph_inout_merge_nodes"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multigraph_merge_inplace"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_inplace"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multidigraph_inout_copy"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_copy"
): no_string_dtype,
key(
"test_relabel.py:TestRelabel.test_relabel_multigraph_nonnumeric_key"
): no_string_dtype,
key("test_contraction.py:test_multigraph_path"): no_object_dtype_for_edges,
key(
"test_contraction.py:test_directed_multigraph_path"
): no_object_dtype_for_edges,
key(
"test_contraction.py:test_multigraph_blockmodel"
): no_object_dtype_for_edges,
key(
"test_summarization.py:TestSNAPUndirectedMulti.test_summary_graph"
): no_string_dtype,
key(
"test_summarization.py:TestSNAPDirectedMulti.test_summary_graph"
): no_string_dtype,
}

from packaging.version import parse
Expand Down Expand Up @@ -118,6 +200,19 @@ def key(testpath):
"test_strongly_connected.py:"
"TestStronglyConnected.test_connected_raise"
): "test is incompatible with pytest>=8",
# NetworkX 3.3 introduced logic around functions that return graphs
key(
"test_vf2pp_helpers.py:TestGraphTinoutUpdating.test_updating"
): nx_cugraph_in_test_setup,
key(
"test_vf2pp_helpers.py:TestGraphTinoutUpdating.test_restoring"
): nx_cugraph_in_test_setup,
key(
"test_vf2pp_helpers.py:TestDiGraphTinoutUpdating.test_updating"
): nx_cugraph_in_test_setup,
key(
"test_vf2pp_helpers.py:TestDiGraphTinoutUpdating.test_restoring"
): nx_cugraph_in_test_setup,
}
)

Expand Down
Loading
Loading