Skip to content

Commit

Permalink
Faster shortest_path (#18)
Browse files Browse the repository at this point in the history
Closes rapidsai/graph_dl#631

For larger graphs, nearly all the time is spent creating dicts of lists of paths. This PR delays creating the lists of paths by duck-typing the return object and returning a Mapping that computes paths as needed. There is virtually no performance penalty for this, and a huge performance gain by delaying computation.

Moved from here: rapidsai/cugraph#4739

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #18
  • Loading branch information
eriknw authored Nov 19, 2024
1 parent 3eb6afc commit a75e921
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 49 deletions.
4 changes: 3 additions & 1 deletion benchmarks/nx-cugraph/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from collections.abc import Mapping

import networkx as nx
import pandas as pd
import pytest
Expand Down Expand Up @@ -496,7 +498,7 @@ def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
iterations=iterations,
warmup_rounds=warmup_rounds,
)
assert type(result) is dict
assert isinstance(result, Mapping) # dict in nx, but we duck-type


def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
Expand Down
98 changes: 73 additions & 25 deletions nx_cugraph/algorithms/shortest_paths/unweighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import itertools

import cupy as cp
Expand All @@ -19,7 +20,7 @@

from nx_cugraph import _nxver
from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
from nx_cugraph.utils import index_dtype, networkx_algorithm

__all__ = [
"bidirectional_shortest_path",
Expand Down Expand Up @@ -179,35 +180,82 @@ def _bfs(
elif not reverse_path:
paths.reverse()
else:
if return_type == "path":
distances = distances[mask]
groups = _groupby(distances, [predecessors[mask], node_ids])

# `pred_node_iter` does the equivalent as these nested for loops:
# for length in range(1, len(groups)):
# preds, nodes = groups[length]
# for pred, node in zip(preds.tolist(), nodes.tolist()):
if G.key_to_id is None:
pred_node_iter = concat(
zip(*(x.tolist() for x in groups[length]))
for length in range(1, len(groups))
)
else:
pred_node_iter = concat(
zip(*(G._nodeiter_to_iter(x.tolist()) for x in groups[length]))
for length in range(1, len(groups))
)
# Consider making utility functions for creating paths
paths = {source: [source]}
# Computing paths to all nodes can be expensive, so let's delay
# computation until needed using `PathMapping`.
key_iter = node_ids.tolist()
pred_iter = predecessors[mask].tolist()
if G.key_to_id is not None:
key_iter = G._nodeiter_to_iter(key_iter)
pred_iter = G._nodeiter_to_iter(pred_iter)
key_to_pred = dict(zip(key_iter, pred_iter))
key_to_pred[source] = None
if reverse_path:
for pred, node in pred_node_iter:
paths[node] = [node, *paths[pred]]
paths = ReversePathMapping({source: [source]}, key_to_pred)
else:
for pred, node in pred_node_iter:
paths[node] = [*paths[pred], node]
paths = PathMapping({source: [source]}, key_to_pred)
if return_type == "path":
return paths
if return_type == "length":
return lengths
# return_type == "length-path"
return lengths, paths


class PathMapping(collections.abc.Mapping):
"""Compute path for nodes as needed using predecessors.
The path for each node contains itself at the beginning of the path.
"""

def __init__(self, data, key_to_pred):
self._data = data
self._key_to_pred = key_to_pred

def __getitem__(self, key):
if key in self._data:
return self._data[key]
stack = [key]
key = self._key_to_pred[key]
while key not in self._data:
stack.append(key)
key = self._key_to_pred[key]
val = self._data[key]
for key in reversed(stack):
val = self._data[key] = [*val, key] # Switched in ReversePathMapping
return val

def __iter__(self):
return iter(self._key_to_pred)

def __len__(self):
return len(self._key_to_pred)


class ReversePathMapping(collections.abc.Mapping):
"""Compute path for nodes as needed using predecessors.
The path for each node contains itself at the end of the path.
"""

def __init__(self, data, key_to_pred):
self._data = data
self._key_to_pred = key_to_pred

def __getitem__(self, key):
if key in self._data:
return self._data[key]
stack = [key]
key = self._key_to_pred[key]
while key not in self._data:
stack.append(key)
key = self._key_to_pred[key]
val = self._data[key]
for key in reversed(stack):
val = self._data[key] = [key, *val] # Switched in PathMapping
return val

def __iter__(self):
return iter(self._key_to_pred)

def __len__(self):
return len(self._key_to_pred)
38 changes: 15 additions & 23 deletions nx_cugraph/algorithms/shortest_paths/weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,9 @@
import pylibcugraph as plc

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import (
_dtype_param,
_get_float_dtype,
_groupby,
networkx_algorithm,
)
from nx_cugraph.utils import _dtype_param, _get_float_dtype, networkx_algorithm

from .unweighted import _bfs
from .unweighted import PathMapping, ReversePathMapping, _bfs

__all__ = [
"dijkstra_path",
Expand Down Expand Up @@ -378,22 +373,19 @@ def _sssp(
elif not reverse_path:
paths.reverse()
else:
groups = _groupby(predecessors[mask], node_ids)
if (id_to_key := G.id_to_key) is not None:
groups = {id_to_key[k]: v for k, v in groups.items() if k >= 0}
paths = {source: [source]}
preds = [source]
while preds:
pred = preds.pop()
pred_path = paths[pred]
nodes = G._nodearray_to_list(groups[pred])
if reverse_path:
for node in nodes:
paths[node] = [node, *pred_path]
else:
for node in nodes:
paths[node] = [*pred_path, node]
preds.extend(nodes & groups.keys())
# Computing paths to all nodes can be expensive, so let's delay
# computation until needed using `PathMapping`.
key_iter = node_ids.tolist()
pred_iter = predecessors[mask].tolist()
if G.key_to_id is not None:
key_iter = G._nodeiter_to_iter(key_iter)
pred_iter = G._nodeiter_to_iter(pred_iter)
key_to_pred = dict(zip(key_iter, pred_iter))
key_to_pred[source] = None
if reverse_path:
paths = ReversePathMapping({source: [source]}, key_to_pred)
else:
paths = PathMapping({source: [source]}, key_to_pred)
if return_type == "path":
return paths
if return_type == "length":
Expand Down

0 comments on commit a75e921

Please sign in to comment.