Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lowest_common_ancestor algorithm #35

Merged
merged 11 commits into from
Feb 5, 2025
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ Below is the list of algorithms that are currently supported in nx-cugraph.
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank">pagerank</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/link_prediction.html#module-networkx.algorithms.link_prediction">link_prediction</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_prediction.jaccard_coefficient.html#networkx.algorithms.link_prediction.jaccard_coefficient">jaccard_coefficient</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/lowest_common_ancestors.html#module-networkx.algorithms.lowest_common_ancestors">lowest_common_ancestors</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.lowest_common_ancestors.lowest_common_ancestor.html#networkx.algorithms.lowest_common_ancestors.lowest_common_ancestor">lowest_common_ancestor</a>
<a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html">operators</a>
└─ <a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html#module-networkx.algorithms.operators.unary">unary</a>
├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.operators.unary.complement.html#networkx.algorithms.operators.unary.complement">complement</a>
Expand Down
2 changes: 2 additions & 0 deletions _nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
"les_miserables_graph",
"lollipop_graph",
"louvain_communities",
"lowest_common_ancestor",
"moebius_kantor_graph",
"node_connected_component",
"null_graph",
Expand Down Expand Up @@ -186,6 +187,7 @@
"generic_bfs_edges": "`neighbors` parameter is not yet supported.",
"katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
"louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
"lowest_common_ancestor": "May not always raise NetworkXError for graphs that are not DAGs.",
"pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
"shortest_path": "Negative weights are not yet supported.",
"shortest_path_length": "Negative weights are not yet supported.",
Expand Down
32 changes: 31 additions & 1 deletion benchmarks/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import random
from collections.abc import Mapping

import networkx as nx
Expand Down Expand Up @@ -902,6 +902,36 @@ def bench_ego_graph(benchmark, graph_obj, backend_wrapper):
assert type(result) is type(G)


def bench_lowest_common_ancestor(benchmark, graph_obj, backend_wrapper):
# Must be DAG
if not nx.is_directed_acyclic_graph(graph_obj):
new_graph_obj = nx.DiGraph()
new_graph_obj.add_nodes_from(graph_obj.nodes(data=True))
new_graph_obj.add_edges_from(
(src, dst, *rest)
for src, dst, *rest in graph_obj.edges(data=True)
if src < dst
)
new_graph_obj.graph.update(graph_obj.graph)
print(
f"WARNING: graph was changed and now had {new_graph_obj.number_of_nodes()} "
"nodes and {new_graph_obj.number_of_edges()} edges."
)
graph_obj = new_graph_obj

G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
r = random.Random(42)
node1, node2 = r.sample(sorted(G), 2)
result = benchmark.pedantic(
target=backend_wrapper(nx.lowest_common_ancestor),
args=(G, node1, node2),
rounds=rounds,
iterations=iterations,
warmup_rounds=warmup_rounds,
)
assert result is None or result in G


def bench_bipartite_BC_n1000_m3000_k100000(benchmark, backend_wrapper):
# Example how to run:
# $ pytest -sv -k "bench_bipartite_BC" \
Expand Down
2 changes: 2 additions & 0 deletions nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
components,
link_analysis,
link_prediction,
lowest_common_ancestors,
operators,
shortest_paths,
traversal,
Expand All @@ -32,6 +33,7 @@
from .isolate import *
from .link_analysis import *
from .link_prediction import *
from .lowest_common_ancestors import *
from .operators import *
from .reciprocity import *
from .shortest_paths import *
Expand Down
104 changes: 104 additions & 0 deletions nx_cugraph/algorithms/lowest_common_ancestors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import cupy as cp
import networkx as nx
import numpy as np
import pylibcugraph as plc

from nx_cugraph.convert import _to_directed_graph
from nx_cugraph.utils import (
_groupby,
index_dtype,
networkx_algorithm,
not_implemented_for,
)

__all__ = ["lowest_common_ancestor"]


@not_implemented_for("undirected")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does adding this make the algorithm automatically raise errors on undirected graphs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It's a decorator from networkx.

We place it before @networkx_algorithm, because NetworkX already checks and raises if the input graph is undirected before dispatching to backends:
https://github.com/networkx/networkx/blob/9beaf7a0b59fe21775cd93862d9c7b28152a2d8c/networkx/algorithms/lowest_common_ancestors.py#L115-L117

In other words, we use the same decorator so the algorithm behaves correctly when used directly such as nxcg.lowest_common_ancestor(G).

@networkx_algorithm(is_incomplete=True, version_added="24.12", _plc="bfs")
def lowest_common_ancestor(G, node1, node2, default=None):
"""May not always raise NetworkXError for graphs that are not DAGs."""
G = _to_directed_graph(G)

# if not nxcg.is_directed_acyclic_graph(G): # TODO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to wait on this before merging?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to, but I don't think it's super important in practice so I would say no. We add a note to the docstring. If/when PLC can help answer whether a graph is a DAG, then we should add nxcg.is_directed_acyclic_graph ASAP.

The difference is that if the graph is not a DAG, then we may still give an answer or we may raise as networkx does. It's common to know whether your graph is a DAG or not (often by construction).

# raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")

if G._N == 0:
raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
if node1 not in G:
nodes = {node1}
raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.")
if node2 not in G:
nodes = {node2}
raise nx.NodeNotFound(f"Node(s) {nodes} from pair {(node1, node2)} not in G.")

# Ancestor BFS from node1
node1_index = node1 if G.key_to_id is None else G.key_to_id[node1]
node2_index = node2 if G.key_to_id is None else G.key_to_id[node2]
if node1_index == node2_index: # Handle trivial case
return node1
plc_graph = G._get_plc_graph(switch_indices=True)
distances1, predecessors1, node_ids1 = plc.bfs(
handle=plc.ResourceHandle(),
graph=plc_graph,
sources=cp.array([node1_index], index_dtype),
direction_optimizing=False, # True for undirected only
depth_limit=-1,
compute_predecessors=False,
do_expensive_check=False,
)
mask1 = distances1 != np.iinfo(distances1.dtype).max
node_ids1 = node_ids1[mask1]

# Ancestor BFS from node2
distances2, predecessors2, node_ids2 = plc.bfs(
handle=plc.ResourceHandle(),
graph=plc_graph,
sources=cp.array([node2_index], index_dtype),
direction_optimizing=False, # True for undirected only
depth_limit=-1,
compute_predecessors=False,
do_expensive_check=False,
)
mask2 = distances2 != np.iinfo(distances2.dtype).max
node_ids2 = node_ids2[mask2]

# Find all common ancestors
common_ids = cp.intersect1d(node_ids1, node_ids2, assume_unique=True)
if common_ids.size == 0:
return default
if common_ids.size == 1:
# Only one; it must be the lowest common ancestor
node_index = common_ids[0].tolist()
return node_index if G.key_to_id is None else G.id_to_key[node_index]

# Find nodes from `common_ids` that have no predecessors from `common_ids`.
# TODO: create utility functions for getting neighbors, predecessors,
# and successors of nodes, which may simplify this code.
mask = cp.isin(G.src_indices, common_ids) & (G.src_indices != G.dst_indices)
groups = _groupby(G.src_indices[mask], G.dst_indices[mask])
# Walk along successors until we reach a lowest common ancestor
node_index = next(iter(groups)) # Choose arbitrary element
seen = set()
while True:
if node_index in seen:
raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
lower_ancestors = cp.intersect1d(groups[node_index], common_ids)
if lower_ancestors.size == 0:
break
seen.add(node_index)
node_index = lower_ancestors[0].tolist() # Arbitrary element
return node_index if G.key_to_id is None else G.id_to_key[node_index]