From 22e317cfadf17b078daedc3c1eac714681aa7c5d Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 30 Nov 2023 16:11:43 -0600 Subject: [PATCH 1/7] nx-cugraph: add `ancestors` and `descendants` --- python/nx-cugraph/_nx_cugraph/__init__.py | 2 + .../nx_cugraph/algorithms/__init__.py | 5 +- .../nx-cugraph/nx_cugraph/algorithms/core.py | 6 +- .../nx-cugraph/nx_cugraph/algorithms/dag.py | 61 +++++++++++++++++++ python/nx-cugraph/nx_cugraph/classes/graph.py | 16 +++++ 5 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/dag.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 1fd436bb845..b0ab9881660 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -29,6 +29,7 @@ # "description": "TODO", "functions": { # BEGIN: functions + "ancestors", "barbell_graph", "betweenness_centrality", "bull_graph", @@ -44,6 +45,7 @@ "davis_southern_women_graph", "degree_centrality", "desargues_graph", + "descendants", "diamond_graph", "dodecahedral_graph", "edge_betweenness_centrality", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 63841b15bd5..9f95354793a 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -15,13 +15,14 @@ centrality, community, components, - shortest_paths, link_analysis, + shortest_paths, ) from .bipartite import complete_bipartite_graph from .centrality import * from .components import * from .core import * +from .dag import * from .isolate import * -from .shortest_paths import * from .link_analysis import * +from .shortest_paths import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 2219388bc58..390598d070e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -31,7 +31,11 @@ def k_truss(G, k): if is_nx := isinstance(G, nx.Graph): G = nxcg.from_networkx(G, preserve_all_attrs=True) if nxcg.number_of_selfloops(G) > 0: - raise nx.NetworkXError( + if nx.__version__[:3] <= "3.2": + exc_class = nx.NetworkXError + else: + exc_class = nx.NetworkXNotImplemented + raise exc_class( "Input graph has self loops which is not permitted; " "Consider using G.remove_edges_from(nx.selfloop_edges(G))." ) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/dag.py b/python/nx-cugraph/nx_cugraph/algorithms/dag.py new file mode 100644 index 00000000000..2014d4c3d27 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/dag.py @@ -0,0 +1,61 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import numpy as np +import pylibcugraph as plc + +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import index_dtype, networkx_algorithm + +__all__ = [ + "descendants", + "ancestors", +] + + +def _ancestors_and_descendants(G, source, *, is_ancestors): + G = _to_graph(G) + if source not in G: + hash(source) # To raise TypeError if appropriate + raise nx.NetworkXError(f"The node {source} is not in the graph.") + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + # XXX: why can't I pass arguments as keywords?! + plc.ResourceHandle(), + G._get_plc_graph(switch_indices=is_ancestors), + cp.array([src_index], dtype=index_dtype), + False, + -1, + False, + False, + # resource_handle=plc.ResourceHandle(), + # graph = G._get_plc_graph(switch_indices=is_ancestors), + # sources=cp.array([src_index], dtype=index_dtype), + # direction_optimizing=False, + # depth_limit=-1, + # compute_predecessors=False, + # do_expensive_check=False, + ) + mask = (distances != np.iinfo(distances.dtype).max) & (distances != 0) + return G._nodearray_to_set(node_ids[mask]) + + +@networkx_algorithm +def descendants(G, source): + return _ancestors_and_descendants(G, source, is_ancestors=False) + + +@networkx_algorithm +def ancestors(G, source): + return _ancestors_and_descendants(G, source, is_ancestors=True) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index e32f93d8bfe..8af7b585190 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -458,6 +458,22 @@ def has_edge(self, u: NodeKey, v: NodeKey) -> bool: return False return bool(((self.src_indices == u) & (self.dst_indices == v)).any()) + def _neighbors(self, n: NodeKey) -> cp.ndarray[NodeValue]: + if n not in self: + hash(n) # To raise TypeError if appropriate + raise nx.NetworkXError(f"The node {n} is not in the graph.") + if self.key_to_id is not None: + n = self.key_to_id[n] + nbrs = self.dst_indices[self.src_indices == n] + if self.is_multigraph(): + nbrs = cp.unique(nbrs) + return nbrs + + @networkx_api + def neighbors(self, n: NodeKey) -> Iterator[NodeKey]: + nbrs = self._neighbors(n) + return iter(self._nodeiter_to_iter(nbrs.tolist())) + @networkx_api def has_node(self, n: NodeKey) -> bool: return n in self From da1c3a1fa674fc73b69517f58632a645f232ddeb Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 30 Nov 2023 18:32:36 -0600 Subject: [PATCH 2/7] Add `bfs_successors` and `bfs_predecessors`; is there a bug in `plc.bfs`?! --- python/nx-cugraph/_nx_cugraph/__init__.py | 4 + .../nx_cugraph/algorithms/__init__.py | 2 + .../nx-cugraph/nx_cugraph/algorithms/dag.py | 26 ++--- .../algorithms/traversal/__init__.py | 13 +++ .../traversal/breadth_first_search.py | 105 ++++++++++++++++++ python/nx-cugraph/nx_cugraph/classes/graph.py | 4 +- python/nx-cugraph/nx_cugraph/utils/misc.py | 20 +++- 7 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index b0ab9881660..a5e8bb7854d 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -32,6 +32,8 @@ "ancestors", "barbell_graph", "betweenness_centrality", + "bfs_predecessors", + "bfs_successors", "bull_graph", "caveman_graph", "chvatal_graph", @@ -101,6 +103,8 @@ "extra_docstrings": { # BEGIN: extra_docstrings "betweenness_centrality": "`weight` parameter is not yet supported.", + "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", + "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 9f95354793a..d28a629fe63 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -17,6 +17,7 @@ components, link_analysis, shortest_paths, + traversal, ) from .bipartite import complete_bipartite_graph from .centrality import * @@ -26,3 +27,4 @@ from .isolate import * from .link_analysis import * from .shortest_paths import * +from .traversal import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/dag.py b/python/nx-cugraph/nx_cugraph/algorithms/dag.py index 2014d4c3d27..067cfed9101 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/dag.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/dag.py @@ -28,24 +28,18 @@ def _ancestors_and_descendants(G, source, *, is_ancestors): G = _to_graph(G) if source not in G: hash(source) # To raise TypeError if appropriate - raise nx.NetworkXError(f"The node {source} is not in the graph.") + raise nx.NetworkXError( + f"The node {source} is not in the {G.__class__.__name__.lower()}." + ) src_index = source if G.key_to_id is None else G.key_to_id[source] distances, predecessors, node_ids = plc.bfs( - # XXX: why can't I pass arguments as keywords?! - plc.ResourceHandle(), - G._get_plc_graph(switch_indices=is_ancestors), - cp.array([src_index], dtype=index_dtype), - False, - -1, - False, - False, - # resource_handle=plc.ResourceHandle(), - # graph = G._get_plc_graph(switch_indices=is_ancestors), - # sources=cp.array([src_index], dtype=index_dtype), - # direction_optimizing=False, - # depth_limit=-1, - # compute_predecessors=False, - # do_expensive_check=False, + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(switch_indices=is_ancestors), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=-1, + compute_predecessors=False, + do_expensive_check=False, ) mask = (distances != np.iinfo(distances.dtype).max) & (distances != 0) return G._nodearray_to_set(node_ids[mask]) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py new file mode 100644 index 00000000000..1751cd46919 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .breadth_first_search import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py new file mode 100644 index 00000000000..11914d94faf --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -0,0 +1,105 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import pylibcugraph as plc + +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm + +__all__ = [ + "bfs_predecessors", + "bfs_successors", +] + + +@networkx_algorithm +def bfs_successors(G, source, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + G = _to_graph(G) + if source not in G: + hash(source) # To raise TypeError if appropriate + raise nx.NetworkXError( + f"The node {source} is not in the {G.__class__.__name__.lower()}." + ) + if depth_limit is not None and depth_limit < 1: + yield (source, []) + return + + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=-1 if depth_limit is None else depth_limit, + compute_predecessors=True, + do_expensive_check=False, + ) + mask = predecessors >= 0 + distances = distances[mask] + predecessors = predecessors[mask] + node_ids = node_ids[mask] + groups = _groupby(distances, [predecessors, node_ids]) + id_to_key = G.id_to_key + for key in range(1, len(groups) + 1): + parent_ids, children_ids = groups[key] + parent_id = parent_ids[0].tolist() + parent = id_to_key[parent_id] if id_to_key is not None else parent_id + children = G._nodearray_to_list(children_ids) + yield (parent, children) + + +@bfs_successors._can_run +def _(G, source, depth_limit=None, sort_neighbors=None): + return sort_neighbors is None + + +@networkx_algorithm +def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + G = _to_graph(G) + if source not in G: + hash(source) # To raise TypeError if appropriate + raise nx.NetworkXError( + f"The node {source} is not in the {G.__class__.__name__.lower()}." + ) + if depth_limit is not None and depth_limit < 1: + return + + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=-1 if depth_limit is None else depth_limit, + compute_predecessors=True, + do_expensive_check=False, + ) + mask = predecessors >= 0 + distances = distances[mask] + predecessors = predecessors[mask] + node_ids = node_ids[mask] + groups = _groupby(distances, [predecessors, node_ids]) + for key in range(1, len(groups) + 1): + parent_ids, children_ids = groups[key] + yield from zip( + G._nodeiter_to_iter(children_ids.tolist()), + G._nodeiter_to_iter(parent_ids.tolist()), + ) + + +@bfs_predecessors._can_run +def _(G, source, depth_limit=None, sort_neighbors=None): + return sort_neighbors is None diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 8af7b585190..199e7af8407 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -461,7 +461,9 @@ def has_edge(self, u: NodeKey, v: NodeKey) -> bool: def _neighbors(self, n: NodeKey) -> cp.ndarray[NodeValue]: if n not in self: hash(n) # To raise TypeError if appropriate - raise nx.NetworkXError(f"The node {n} is not in the graph.") + raise nx.NetworkXError( + f"The node {n} is not in the {self.__class__.__name__.lower()}." + ) if self.key_to_id is not None: n = self.key_to_id[n] nbrs = self.dst_indices[self.src_indices == n] diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py index e303375918d..f3cefea4d2c 100644 --- a/python/nx-cugraph/nx_cugraph/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -58,7 +58,9 @@ def pairwise(it): def _groupby( - groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False + groups: cp.ndarray, + values: cp.ndarray | list[cp.ndarray], + groups_are_canonical: bool = False, ) -> dict[int, cp.ndarray]: """Perform a groupby operation given an array of group IDs and array of values. @@ -66,8 +68,8 @@ def _groupby( ---------- groups : cp.ndarray Array that holds the group IDs. - values : cp.ndarray - Array of values to be grouped according to groups. + values : cp.ndarray or list of cp.ndarray + Array or list of arrays of values to be grouped according to groups. Must be the same size as groups array. groups_are_canonical : bool, default False Whether the group IDs are consecutive integers beginning with 0. @@ -80,7 +82,10 @@ def _groupby( return {} sort_indices = cp.argsort(groups) sorted_groups = groups[sort_indices] - sorted_values = values[sort_indices] + if not isinstance(values, list): + sorted_values = values[sort_indices] + else: + sorted_values = [vals[sort_indices] for vals in values] prepend = 1 if groups_are_canonical else sorted_groups[0] + 1 left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0] boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size])) @@ -88,7 +93,12 @@ def _groupby( it = enumerate(boundaries) else: it = zip(sorted_groups[left_bounds].tolist(), boundaries) - return {group: sorted_values[start:end] for group, (start, end) in it} + if not isinstance(values, list): + return {group: sorted_values[start:end] for group, (start, end) in it} + return { + group: [sorted_vals[start:end] for sorted_vals in sorted_values] + for group, (start, end) in it + } def _seed_to_int(seed: int | Random | None) -> int: From 601f8b362d32ac154e378a0d7eaa64f44c1b30c6 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 1 Dec 2023 05:11:52 -0600 Subject: [PATCH 3/7] Add bfs_edges, bfs_tree, generic_bfs_edges --- python/nx-cugraph/_nx_cugraph/__init__.py | 6 + .../traversal/breadth_first_search.py | 154 +++++++++++++++++- .../nx-cugraph/nx_cugraph/convert_matrix.py | 1 + python/nx-cugraph/nx_cugraph/utils/misc.py | 52 ++++-- 4 files changed, 187 insertions(+), 26 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index a5e8bb7854d..5e0fb3cd543 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -32,8 +32,10 @@ "ancestors", "barbell_graph", "betweenness_centrality", + "bfs_edges", "bfs_predecessors", "bfs_successors", + "bfs_tree", "bull_graph", "caveman_graph", "chvatal_graph", @@ -57,6 +59,7 @@ "from_pandas_edgelist", "from_scipy_sparse_array", "frucht_graph", + "generic_bfs_edges", "heawood_graph", "hits", "house_graph", @@ -103,11 +106,14 @@ "extra_docstrings": { # BEGIN: extra_docstrings "betweenness_centrality": "`weight` parameter is not yet supported.", + "bfs_edges": "`sort_neighbors` parameter is not yet supported.", "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", "bfs_successors": "`sort_neighbors` parameter is not yet supported.", + "bfs_tree": "`sort_neighbors` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", + "generic_bfs_edges": "`neighbors` parameter is not yet supported.", "k_truss": ( "Currently raises `NotImplementedError` for graphs with more than one connected\n" "component when k >= 3. We expect to fix this soon." diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index 11914d94faf..ab4967a6d1e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -10,19 +10,153 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from itertools import repeat + import cupy as cp import networkx as nx import pylibcugraph as plc +import nx_cugraph as nxcg from nx_cugraph.convert import _to_graph from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm __all__ = [ + "bfs_edges", + "bfs_tree", "bfs_predecessors", "bfs_successors", + "generic_bfs_edges", ] +@networkx_algorithm +def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): + """`neighbors` parameter is not yet supported.""" + return bfs_edges(source, depth_limit=depth_limit) + + +@generic_bfs_edges._can_run +def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): + return neighbors is None and sort_neighbors is None + + +@networkx_algorithm +def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + # DRY warning: see also bfs_predecessors and bfs_tree + G = _to_graph(G) + if source not in G: + hash(source) # To raise TypeError if appropriate + raise nx.NetworkXError( + f"The node {source} is not in the {G.__class__.__name__.lower()}." + ) + if depth_limit is not None and depth_limit < 1: + return + + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(switch_indices=reverse), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=-1 if depth_limit is None else depth_limit, + compute_predecessors=True, + do_expensive_check=False, + ) + mask = predecessors >= 0 + distances = distances[mask] + predecessors = predecessors[mask] + node_ids = node_ids[mask] + groups = _groupby([distances, predecessors], node_ids) + id_to_key = G.id_to_key + for key in sorted(groups): + children_ids = groups[key] + parent_id = key[1] + parent = id_to_key[parent_id] if id_to_key is not None else parent_id + yield from zip( + repeat(parent, children_ids.size), + G._nodeiter_to_iter(children_ids.tolist()), + ) + + +@bfs_edges._can_run +def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): + return sort_neighbors is None + + +@networkx_algorithm +def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + # DRY warning: see also bfs_edges and bfs_predecessors + G = _to_graph(G) + if source not in G: + hash(source) # To raise TypeError if appropriate + raise nx.NetworkXError( + f"The node {source} is not in the {G.__class__.__name__.lower()}." + ) + if depth_limit is not None and depth_limit < 1: + return nxcg.DiGraph.from_coo( + 1, + cp.array([], dtype=index_dtype), + cp.array([], dtype=index_dtype), + id_to_key=[source], + ) + + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(switch_indices=reverse), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=-1 if depth_limit is None else depth_limit, + compute_predecessors=True, + do_expensive_check=False, + ) + mask = predecessors >= 0 + predecessors = predecessors[mask] + if predecessors.size == 0: + return nxcg.DiGraph.from_coo( + 1, + cp.array([], dtype=index_dtype), + cp.array([], dtype=index_dtype), + id_to_key=[source], + ) + node_ids = node_ids[mask] + # TODO: create renumbering helper function(s) + unique_node_ids = cp.unique(cp.hstack((predecessors, node_ids))) + # Renumber edges + # Option 1 + src_indices = cp.searchsorted(unique_node_ids, predecessors) + dst_indices = cp.searchsorted(unique_node_ids, node_ids) + # Option 2 + # mapper = cp.zeros(len(G), index_dtype) + # mapper[unique_node_ids] = cp.arange(unique_node_ids.size, dtype=mapper.dtype) + # src_indices = mapper[predecessors] + # dst_indices = mapper[node_ids] + # Renumber nodes + if (id_to_key := G.id_to_key) is not None: + key_to_id = { + id_to_key[old_index]: new_index + for new_index, old_index in enumerate(unique_node_ids.tolist()) + } + else: + key_to_id = { + old_index: new_index + for new_index, old_index in enumerate(unique_node_ids.tolist()) + } + return nxcg.DiGraph.from_coo( + unique_node_ids.size, + src_indices, + dst_indices, + key_to_id=key_to_id, + ) + + +@bfs_tree._can_run +def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): + return sort_neighbors is None + + @networkx_algorithm def bfs_successors(G, source, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" @@ -50,11 +184,11 @@ def bfs_successors(G, source, depth_limit=None, sort_neighbors=None): distances = distances[mask] predecessors = predecessors[mask] node_ids = node_ids[mask] - groups = _groupby(distances, [predecessors, node_ids]) + groups = _groupby([distances, predecessors], node_ids) id_to_key = G.id_to_key - for key in range(1, len(groups) + 1): - parent_ids, children_ids = groups[key] - parent_id = parent_ids[0].tolist() + for key in sorted(groups): + children_ids = groups[key] + parent_id = key[1] parent = id_to_key[parent_id] if id_to_key is not None else parent_id children = G._nodearray_to_list(children_ids) yield (parent, children) @@ -68,6 +202,7 @@ def _(G, source, depth_limit=None, sort_neighbors=None): @networkx_algorithm def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" + # DRY warning: see also bfs_edges and bfs_tree G = _to_graph(G) if source not in G: hash(source) # To raise TypeError if appropriate @@ -91,12 +226,15 @@ def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): distances = distances[mask] predecessors = predecessors[mask] node_ids = node_ids[mask] - groups = _groupby(distances, [predecessors, node_ids]) - for key in range(1, len(groups) + 1): - parent_ids, children_ids = groups[key] + groups = _groupby([distances, predecessors], node_ids) + id_to_key = G.id_to_key + for key in sorted(groups): + children_ids = groups[key] + parent_id = key[1] + parent = id_to_key[parent_id] if id_to_key is not None else parent_id yield from zip( G._nodeiter_to_iter(children_ids.tolist()), - G._nodeiter_to_iter(parent_ids.tolist()), + repeat(parent, children_ids.size), ) diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py index 6c8b8fb4a1d..80ca0c2fa4b 100644 --- a/python/nx-cugraph/nx_cugraph/convert_matrix.py +++ b/python/nx-cugraph/nx_cugraph/convert_matrix.py @@ -36,6 +36,7 @@ def from_pandas_edgelist( graph_class, inplace = _create_using_class(create_using) src_array = df[source].to_numpy() dst_array = df[target].to_numpy() + # TODO: create renumbering helper function(s) # Renumber step 0: node keys nodes = np.unique(np.concatenate([src_array, dst_array])) N = nodes.size diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py index f3cefea4d2c..aa06d7fd29b 100644 --- a/python/nx-cugraph/nx_cugraph/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -58,7 +58,7 @@ def pairwise(it): def _groupby( - groups: cp.ndarray, + groups: cp.ndarray | list[cp.ndarray], values: cp.ndarray | list[cp.ndarray], groups_are_canonical: bool = False, ) -> dict[int, cp.ndarray]: @@ -66,8 +66,8 @@ def _groupby( Parameters ---------- - groups : cp.ndarray - Array that holds the group IDs. + groups : cp.ndarray or list of cp.ndarray + Array or list of arrays that holds the group IDs. values : cp.ndarray or list of cp.ndarray Array or list of arrays of values to be grouped according to groups. Must be the same size as groups array. @@ -78,27 +78,43 @@ def _groupby( ------- dict with group IDs as keys and cp.ndarray as values. """ - if groups.size == 0: - return {} - sort_indices = cp.argsort(groups) - sorted_groups = groups[sort_indices] - if not isinstance(values, list): - sorted_values = values[sort_indices] + if isinstance(groups, list): + if groups_are_canonical: + raise ValueError( + "`groups_are_canonical=True` is not allowed when `groups` is a list." + ) + if len(groups) == 0 or (size := groups[0].size) == 0: + return {} + sort_indices = cp.lexsort(cp.vstack(groups[::-1])) + sorted_groups = cp.vstack([group[sort_indices] for group in groups]) + prepend = sorted_groups[:, 0].max() + 1 + changed = cp.abs(cp.diff(sorted_groups, prepend=prepend)).sum(axis=0) + changed[0] = 1 + left_bounds = cp.nonzero(changed)[0] else: + if (size := groups.size) == 0: + return {} + sort_indices = cp.argsort(groups) + sorted_groups = groups[sort_indices] + prepend = 1 if groups_are_canonical else sorted_groups[0] + 1 + left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0] + if isinstance(values, list): sorted_values = [vals[sort_indices] for vals in values] - prepend = 1 if groups_are_canonical else sorted_groups[0] + 1 - left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0] - boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size])) + else: + sorted_values = values[sort_indices] + boundaries = pairwise(itertools.chain(left_bounds.tolist(), [size])) if groups_are_canonical: it = enumerate(boundaries) + elif isinstance(groups, list): + it = zip(map(tuple, sorted_groups.T[left_bounds].tolist()), boundaries) else: it = zip(sorted_groups[left_bounds].tolist(), boundaries) - if not isinstance(values, list): - return {group: sorted_values[start:end] for group, (start, end) in it} - return { - group: [sorted_vals[start:end] for sorted_vals in sorted_values] - for group, (start, end) in it - } + if isinstance(values, list): + return { + group: [sorted_vals[start:end] for sorted_vals in sorted_values] + for group, (start, end) in it + } + return {group: sorted_values[start:end] for group, (start, end) in it} def _seed_to_int(seed: int | Random | None) -> int: From d36af2638a43799aede0adcc6f39919c472d3e6b Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 5 Dec 2023 11:54:25 -0600 Subject: [PATCH 4/7] Add `descendants_at_distance` and `bfs_layers` This required skipping several tests that use algorithms that repeatedly call `bfs_layers`. Also, refactor BFS algorithms to reduce repetition. Also, update converting from networkx to handle non-dict edge data mappings. --- python/nx-cugraph/_nx_cugraph/__init__.py | 2 + .../traversal/breadth_first_search.py | 165 ++++++++++-------- python/nx-cugraph/nx_cugraph/classes/graph.py | 5 + python/nx-cugraph/nx_cugraph/convert.py | 79 +++++---- python/nx-cugraph/nx_cugraph/interface.py | 33 ++++ 5 files changed, 178 insertions(+), 106 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 5e0fb3cd543..8f1992923c6 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -33,6 +33,7 @@ "barbell_graph", "betweenness_centrality", "bfs_edges", + "bfs_layers", "bfs_predecessors", "bfs_successors", "bfs_tree", @@ -50,6 +51,7 @@ "degree_centrality", "desargues_graph", "descendants", + "descendants_at_distance", "diamond_graph", "dodecahedral_graph", "edge_betweenness_centrality", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index ab4967a6d1e..2f7c8c7d982 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -14,6 +14,7 @@ import cupy as cp import networkx as nx +import numpy as np import pylibcugraph as plc import nx_cugraph as nxcg @@ -25,34 +26,23 @@ "bfs_tree", "bfs_predecessors", "bfs_successors", + "descendants_at_distance", + "bfs_layers", "generic_bfs_edges", ] -@networkx_algorithm -def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): - """`neighbors` parameter is not yet supported.""" - return bfs_edges(source, depth_limit=depth_limit) - - -@generic_bfs_edges._can_run -def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): - return neighbors is None and sort_neighbors is None - - -@networkx_algorithm -def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): - """`sort_neighbors` parameter is not yet supported.""" - # DRY warning: see also bfs_predecessors and bfs_tree +def _check_G_and_source(G, source): G = _to_graph(G) if source not in G: hash(source) # To raise TypeError if appropriate raise nx.NetworkXError( f"The node {source} is not in the {G.__class__.__name__.lower()}." ) - if depth_limit is not None and depth_limit < 1: - return + return G + +def _bfs(G, source, *, depth_limit=None, reverse=False): src_index = source if G.key_to_id is None else G.key_to_id[source] distances, predecessors, node_ids = plc.bfs( handle=plc.ResourceHandle(), @@ -64,9 +54,30 @@ def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): do_expensive_check=False, ) mask = predecessors >= 0 - distances = distances[mask] - predecessors = predecessors[mask] - node_ids = node_ids[mask] + return distances[mask], predecessors[mask], node_ids[mask] + + +@networkx_algorithm +def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): + """`neighbors` parameter is not yet supported.""" + return bfs_edges(source, depth_limit=depth_limit) + + +@generic_bfs_edges._can_run +def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): + return neighbors is None and sort_neighbors is None + + +@networkx_algorithm +def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + G = _check_G_and_source(G, source) + if depth_limit is not None and depth_limit < 1: + return + distances, predecessors, node_ids = _bfs( + G, source, depth_limit=depth_limit, reverse=reverse + ) + # Using groupby like this is similar to bfs_predecessors groups = _groupby([distances, predecessors], node_ids) id_to_key = G.id_to_key for key in sorted(groups): @@ -87,13 +98,7 @@ def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): @networkx_algorithm def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" - # DRY warning: see also bfs_edges and bfs_predecessors - G = _to_graph(G) - if source not in G: - hash(source) # To raise TypeError if appropriate - raise nx.NetworkXError( - f"The node {source} is not in the {G.__class__.__name__.lower()}." - ) + G = _check_G_and_source(G, source) if depth_limit is not None and depth_limit < 1: return nxcg.DiGraph.from_coo( 1, @@ -102,18 +107,12 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): id_to_key=[source], ) - src_index = source if G.key_to_id is None else G.key_to_id[source] - distances, predecessors, node_ids = plc.bfs( - handle=plc.ResourceHandle(), - graph=G._get_plc_graph(switch_indices=reverse), - sources=cp.array([src_index], dtype=index_dtype), - direction_optimizing=False, - depth_limit=-1 if depth_limit is None else depth_limit, - compute_predecessors=True, - do_expensive_check=False, + distances, predecessors, node_ids = _bfs( + G, + source, + depth_limit=depth_limit, + reverse=reverse, ) - mask = predecessors >= 0 - predecessors = predecessors[mask] if predecessors.size == 0: return nxcg.DiGraph.from_coo( 1, @@ -121,7 +120,6 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): cp.array([], dtype=index_dtype), id_to_key=[source], ) - node_ids = node_ids[mask] # TODO: create renumbering helper function(s) unique_node_ids = cp.unique(cp.hstack((predecessors, node_ids))) # Renumber edges @@ -160,30 +158,12 @@ def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): @networkx_algorithm def bfs_successors(G, source, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" - G = _to_graph(G) - if source not in G: - hash(source) # To raise TypeError if appropriate - raise nx.NetworkXError( - f"The node {source} is not in the {G.__class__.__name__.lower()}." - ) + G = _check_G_and_source(G, source) if depth_limit is not None and depth_limit < 1: yield (source, []) return - src_index = source if G.key_to_id is None else G.key_to_id[source] - distances, predecessors, node_ids = plc.bfs( - handle=plc.ResourceHandle(), - graph=G._get_plc_graph(), - sources=cp.array([src_index], dtype=index_dtype), - direction_optimizing=False, - depth_limit=-1 if depth_limit is None else depth_limit, - compute_predecessors=True, - do_expensive_check=False, - ) - mask = predecessors >= 0 - distances = distances[mask] - predecessors = predecessors[mask] - node_ids = node_ids[mask] + distances, predecessors, node_ids = _bfs(G, source, depth_limit=depth_limit) groups = _groupby([distances, predecessors], node_ids) id_to_key = G.id_to_key for key in sorted(groups): @@ -200,32 +180,42 @@ def _(G, source, depth_limit=None, sort_neighbors=None): @networkx_algorithm -def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): - """`sort_neighbors` parameter is not yet supported.""" - # DRY warning: see also bfs_edges and bfs_tree +def bfs_layers(G, sources): G = _to_graph(G) - if source not in G: - hash(source) # To raise TypeError if appropriate - raise nx.NetworkXError( - f"The node {source} is not in the {G.__class__.__name__.lower()}." - ) - if depth_limit is not None and depth_limit < 1: - return - - src_index = source if G.key_to_id is None else G.key_to_id[source] + if sources in G: + sources = [sources] + else: + sources = set(sources) + if not all(source in G for source in sources): + node = next(source for source in sources if source not in G) + raise nx.NetworkXError(f"The node {node} is not in the graph.") + sources = list(sources) + source_ids = G._list_to_nodearray(sources) distances, predecessors, node_ids = plc.bfs( handle=plc.ResourceHandle(), graph=G._get_plc_graph(), - sources=cp.array([src_index], dtype=index_dtype), + sources=source_ids, direction_optimizing=False, - depth_limit=-1 if depth_limit is None else depth_limit, - compute_predecessors=True, + depth_limit=-1, + compute_predecessors=False, do_expensive_check=False, ) - mask = predecessors >= 0 + mask = distances != np.iinfo(distances.dtype).max distances = distances[mask] - predecessors = predecessors[mask] node_ids = node_ids[mask] + groups = _groupby(distances, node_ids) + return (G._nodearray_to_list(groups[key]) for key in range(len(groups))) + + +@networkx_algorithm +def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): + """`sort_neighbors` parameter is not yet supported.""" + G = _check_G_and_source(G, source) + if depth_limit is not None and depth_limit < 1: + return + + distances, predecessors, node_ids = _bfs(G, source, depth_limit=depth_limit) + # We include `predecessors` in the groupby for "nicer" iteration order groups = _groupby([distances, predecessors], node_ids) id_to_key = G.id_to_key for key in sorted(groups): @@ -241,3 +231,26 @@ def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): @bfs_predecessors._can_run def _(G, source, depth_limit=None, sort_neighbors=None): return sort_neighbors is None + + +@networkx_algorithm +def descendants_at_distance(G, source, distance): + G = _check_G_and_source(G, source) + if distance is None or distance < 0: + return set() + if distance == 0: + return {source} + + src_index = source if G.key_to_id is None else G.key_to_id[source] + distances, predecessors, node_ids = plc.bfs( + handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + sources=cp.array([src_index], dtype=index_dtype), + direction_optimizing=False, + depth_limit=distance, + compute_predecessors=False, + do_expensive_check=False, + ) + mask = distances == distance + node_ids = node_ids[mask] + return G._nodearray_to_set(node_ids) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 199e7af8407..cdd3f744f24 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -719,6 +719,11 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]: return node_ids.tolist() return list(self._nodeiter_to_iter(node_ids.tolist())) + def _list_to_nodearray(self, nodes: list[NodeKey]) -> cp.ndarray[IndexValue]: + if (key_to_id := self.key_to_id) is not None: + nodes = [key_to_id[node] for node in nodes] + return cp.array(nodes, dtype=index_dtype) + def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]: if self.key_to_id is None: return set(node_ids.tolist()) diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index 3c0814370d3..f265540a161 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -39,6 +39,24 @@ REQUIRED = ... +def _iterate_values(graph, adj, is_dicts, func): + # Using `dict.values` is faster and is the common case, but it doesn't always work + if is_dicts is not False: + it = concat(map(dict.values, adj.values())) + if graph is not None and graph.is_multigraph(): + it = concat(map(dict.values, it)) + try: + return func(it), True + except TypeError: + if is_dicts is True: + raise + # May not be regular dicts + it = concat(x.values() for x in adj.values()) + if graph is not None and graph.is_multigraph(): + it = concat(x.values() for x in it) + return func(it), False + + def from_networkx( graph: nx.Graph, edge_attrs: AttrKey | dict[AttrKey, EdgeValue | None] | None = None, @@ -152,6 +170,7 @@ def from_networkx( if isinstance(adj, nx.classes.coreviews.FilterAdjacency): adj = {k: dict(v) for k, v in adj.items()} + is_dicts = None N = len(adj) if ( not preserve_edge_attrs @@ -162,12 +181,9 @@ def from_networkx( # Either we weren't asked to preserve edge attributes, or there are no edges edge_attrs = None elif preserve_edge_attrs: - # Using comprehensions should be just as fast starting in Python 3.11 - it = concat(map(dict.values, adj.values())) - if graph.is_multigraph(): - it = concat(map(dict.values, it)) - # PERF: should we add `filter(None, ...)` to remove empty data dicts? - attr_sets = set(map(frozenset, it)) + attr_sets, is_dicts = _iterate_values( + graph, adj, is_dicts, lambda it: set(map(frozenset, it)) + ) attrs = frozenset.union(*attr_sets) edge_attrs = dict.fromkeys(attrs, REQUIRED) if len(attr_sets) > 1: @@ -207,10 +223,9 @@ def from_networkx( del edge_attrs[attr] # Else some edges have attribute (default already None) else: - it = concat(map(dict.values, adj.values())) - if graph.is_multigraph(): - it = concat(map(dict.values, it)) - attr_sets = set(map(required.intersection, it)) + attr_sets, is_dicts = _iterate_values( + graph, adj, is_dicts, lambda it: set(map(required.intersection, it)) + ) for attr in required - frozenset.union(*attr_sets): # No edges have these attributes del edge_attrs[attr] @@ -269,17 +284,19 @@ def from_networkx( dst_iter = map(key_to_id.__getitem__, dst_iter) if graph.is_multigraph(): dst_indices = np.fromiter(dst_iter, index_dtype) - num_multiedges = np.fromiter( - map(len, concat(map(dict.values, adj.values()))), index_dtype + num_multiedges, is_dicts = _iterate_values( + None, adj, is_dicts, lambda it: np.fromiter(map(len, it), index_dtype) ) # cp.repeat is slow to use here, so use numpy instead dst_indices = cp.array(np.repeat(dst_indices, num_multiedges)) # Determine edge keys and edge ids for multigraphs - edge_keys = list(concat(concat(map(dict.values, adj.values())))) - edge_indices = cp.fromiter( - concat(map(range, map(len, concat(map(dict.values, adj.values()))))), - index_dtype, - ) + if is_dicts: + edge_keys = list(concat(concat(map(dict.values, adj.values())))) + it = concat(map(dict.values, adj.values())) + else: + edge_keys = list(concat(concat(x.values() for x in adj.values()))) + it = concat(x.values() for x in adj.values()) + edge_indices = cp.fromiter(concat(map(range, map(len, it))), index_dtype) if edge_keys == edge_indices.tolist(): edge_keys = None # Prefer edge_indices else: @@ -323,19 +340,21 @@ def from_networkx( edge_masks[edge_attr] = cp.fromiter(iter_mask, bool) edge_values[edge_attr] = cp.array(vals, dtype) # if vals.ndim > 1: ... + elif edge_default is REQUIRED: + if dtype is None: + + def func(it, edge_attr=edge_attr): + return cp.array(list(map(op.itemgetter(edge_attr), it))) + + else: + + def func(it, edge_attr=edge_attr, dtype=dtype): + return cp.fromiter(map(op.itemgetter(edge_attr), it), dtype) + + edge_value, is_dicts = _iterate_values(graph, adj, is_dicts, func) + edge_values[edge_attr] = edge_value else: - if edge_default is REQUIRED: - # Using comprehensions should be fast starting in Python 3.11 - # iter_values = ( - # edgedata[edge_attr] - # for rowdata in adj.values() - # for edgedata in rowdata.values() - # ) - it = concat(map(dict.values, adj.values())) - if graph.is_multigraph(): - it = concat(map(dict.values, it)) - iter_values = map(op.itemgetter(edge_attr), it) - elif graph.is_multigraph(): + if graph.is_multigraph(): iter_values = ( edgedata.get(edge_attr, edge_default) for rowdata in adj.values() @@ -352,7 +371,7 @@ def from_networkx( edge_values[edge_attr] = cp.array(list(iter_values)) else: edge_values[edge_attr] = cp.fromiter(iter_values, dtype) - # if vals.ndim > 1: ... + # if vals.ndim > 1: ... # cp.repeat is slow to use here, so use numpy instead src_indices = np.repeat( diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index be6b3596030..73eb70807ad 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -246,6 +246,39 @@ def key(testpath): key("test_tree_isomorphism.py:test_positive"): too_slow, key("test_tree_isomorphism.py:test_negative"): too_slow, key("test_efficiency.py:TestEfficiency.test_using_ego_graph"): maybe_oom, + # These repeatedly call `bfs_layers`, which converts the graph every call + key( + "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph2_different_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph3_same_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph3_different_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph4_same_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp." + "test_disconnected_graph_all_same_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp." + "test_disconnected_graph_all_different_labels" + ): too_slow, + key( + "test_vf2pp.py:TestGraphISOVF2pp." + "test_disconnected_graph_some_same_labels" + ): too_slow, + key( + "test_vf2pp.py:TestMultiGraphISOVF2pp." + "test_custom_multigraph3_same_labels" + ): too_slow, + key( + "test_vf2pp_helpers.py:TestNodeOrdering." + "test_matching_order_all_branches" + ): too_slow, } for item in items: From 0daffccb4777020373daf201b405b034bba61a20 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 6 Dec 2023 13:13:18 -0600 Subject: [PATCH 5/7] Clean up based on review --- python/nx-cugraph/_nx_cugraph/__init__.py | 2 +- python/nx-cugraph/nx_cugraph/algorithms/core.py | 13 ++++++++----- .../algorithms/traversal/breadth_first_search.py | 12 +++--------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 8f1992923c6..d02c9c3e940 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -115,7 +115,7 @@ "edge_betweenness_centrality": "`weight` parameter is not yet supported.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", - "generic_bfs_edges": "`neighbors` parameter is not yet supported.", + "generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.", "k_truss": ( "Currently raises `NotImplementedError` for graphs with more than one connected\n" "component when k >= 3. We expect to fix this soon." diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 390598d070e..c00df2d832f 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -15,7 +15,12 @@ import pylibcugraph as plc import nx_cugraph as nxcg -from nx_cugraph.utils import _get_int_dtype, networkx_algorithm, not_implemented_for +from nx_cugraph.utils import ( + _get_int_dtype, + index_dtype, + networkx_algorithm, + not_implemented_for, +) __all__ = ["k_truss"] @@ -81,10 +86,8 @@ def k_truss(G, k): edge_values = {key: val[edge_indices] for key, val in G.edge_values.items()} edge_masks = {key: val[edge_indices] for key, val in G.edge_masks.items()} # Renumber step 2: edge indices - mapper = cp.zeros(len(G), src_indices.dtype) - mapper[node_indices] = cp.arange(node_indices.size, dtype=mapper.dtype) - src_indices = mapper[src_indices] - dst_indices = mapper[dst_indices] + src_indices = cp.searchsorted(node_indices, src_indices).astype(index_dtype) + dst_indices = cp.searchsorted(node_indices, dst_indices).astype(index_dtype) # Renumber step 3: node values node_values = {key: val[node_indices] for key, val in G.node_values.items()} node_masks = {key: val[node_indices] for key, val in G.node_masks.items()} diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index 2f7c8c7d982..e2a7d46f462 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -59,7 +59,7 @@ def _bfs(G, source, *, depth_limit=None, reverse=False): @networkx_algorithm def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): - """`neighbors` parameter is not yet supported.""" + """`neighbors` and `sort_neighbors` parameters are not yet supported.""" return bfs_edges(source, depth_limit=depth_limit) @@ -123,14 +123,8 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): # TODO: create renumbering helper function(s) unique_node_ids = cp.unique(cp.hstack((predecessors, node_ids))) # Renumber edges - # Option 1 - src_indices = cp.searchsorted(unique_node_ids, predecessors) - dst_indices = cp.searchsorted(unique_node_ids, node_ids) - # Option 2 - # mapper = cp.zeros(len(G), index_dtype) - # mapper[unique_node_ids] = cp.arange(unique_node_ids.size, dtype=mapper.dtype) - # src_indices = mapper[predecessors] - # dst_indices = mapper[node_ids] + src_indices = cp.searchsorted(unique_node_ids, predecessors).astype(index_dtype) + dst_indices = cp.searchsorted(unique_node_ids, node_ids).astype(index_dtype) # Renumber nodes if (id_to_key := G.id_to_key) is not None: key_to_id = { From 04dcf6d085d5fc34f27dbf6225c4bf2f62429fa8 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 20 Dec 2023 07:08:33 -0600 Subject: [PATCH 6/7] Skip tests that may OOM, but also test them individually in CI --- ci/test_python.sh | 5 +++++ python/nx-cugraph/nx_cugraph/interface.py | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ci/test_python.sh b/ci/test_python.sh index d6e92e8d1a5..6988b105029 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -111,6 +111,11 @@ popd rapids-logger "pytest networkx using nx-cugraph backend" pushd python/nx-cugraph ./run_nx_tests.sh +# Individually run tests that are skipped above b/c they may run out of memory +PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestDAG and test_antichains" +PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestMultiDiGraph_DAGLCA and test_all_pairs_lca_pairs_without_lca" +PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestDAGLCA and test_all_pairs_lca_pairs_without_lca" +PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestEfficiency and test_using_ego_graph" # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% # in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL") diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 73eb70807ad..3f6449f571a 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -12,6 +12,7 @@ # limitations under the License. from __future__ import annotations +import os import sys import networkx as nx @@ -246,6 +247,15 @@ def key(testpath): key("test_tree_isomorphism.py:test_positive"): too_slow, key("test_tree_isomorphism.py:test_negative"): too_slow, key("test_efficiency.py:TestEfficiency.test_using_ego_graph"): maybe_oom, + key("test_dag.py:TestDAG.test_antichains"): maybe_oom, + key( + "test_lowest_common_ancestors.py:" + "TestDAGLCA.test_all_pairs_lca_pairs_without_lca" + ): maybe_oom, + key( + "test_lowest_common_ancestors.py:" + "TestMultiDiGraph_DAGLCA.test_all_pairs_lca_pairs_without_lca" + ): maybe_oom, # These repeatedly call `bfs_layers`, which converts the graph every call key( "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph2_different_labels" @@ -280,6 +290,8 @@ def key(testpath): "test_matching_order_all_branches" ): too_slow, } + if os.environ.get("PYTEST_NO_SKIP", False): + skip.clear() for item in items: kset = set(item.keywords) From 2742cf7bc291c4e511adc6143bfc083ef7160e0e Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 20 Dec 2023 11:43:46 -0600 Subject: [PATCH 7/7] oops fix --- ci/test_python.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 58e70e00912..500bc2f3467 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -112,10 +112,10 @@ rapids-logger "pytest networkx using nx-cugraph backend" pushd python/nx-cugraph ./run_nx_tests.sh # Individually run tests that are skipped above b/c they may run out of memory -PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestDAG and test_antichains" -PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestMultiDiGraph_DAGLCA and test_all_pairs_lca_pairs_without_lca" -PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestDAGLCA and test_all_pairs_lca_pairs_without_lca" -PYTEST_NO_SKIP=True ./run_nx_tests.sh -k --cov-append "TestEfficiency and test_using_ego_graph" +PYTEST_NO_SKIP=True ./run_nx_tests.sh --cov-append -k "TestDAG and test_antichains" +PYTEST_NO_SKIP=True ./run_nx_tests.sh --cov-append -k "TestMultiDiGraph_DAGLCA and test_all_pairs_lca_pairs_without_lca" +PYTEST_NO_SKIP=True ./run_nx_tests.sh --cov-append -k "TestDAGLCA and test_all_pairs_lca_pairs_without_lca" +PYTEST_NO_SKIP=True ./run_nx_tests.sh --cov-append -k "TestEfficiency and test_using_ego_graph" # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% # in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL")