diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf304be0b..b005d31ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: types_or: [markdown, yaml] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.1.14" + rev: "v0.1.15" hooks: - id: ruff args: [--fix] diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cf17b7e6..83daf8bba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,11 @@ ## 1.9.0 (Unreleased) +- add endpoint_attrs argument to simplification.simplify_graph function to flexibly relax strictness (#1117) - fix a bug in the features module's polygon handling (#1104) - update obsolete numpy random number generation (#1108) - update warning messages to note that deprecated code will be removed in v2.0.0 (#1111) +- deprecate strict argument in simplification.simplify_graph function in favor of new endpoint_attrs argument (#1117) - deprecate north, south, east, west arguments throughout package in favor of bbox tuple argument (#1112) - deprecate return_coords argument in graph.graph_from_address function (#1105) - deprecate return_hex argument in plot.get_colors function (#1109) diff --git a/osmnx/simplification.py b/osmnx/simplification.py index f6479a0cd..79fc4fa2c 100644 --- a/osmnx/simplification.py +++ b/osmnx/simplification.py @@ -15,18 +15,25 @@ from ._errors import GraphSimplificationError -def _is_endpoint(G, node, strict=True): +def _is_endpoint(G, node, endpoint_attrs): """ Determine if a node is a true endpoint of an edge. - Return True if the node is a "real" endpoint of an edge in the network, - otherwise False. OSM data includes lots of nodes that exist only as points - to help streets bend around curves. An end point is a node that either: - 1) is its own neighbor, ie, it self-loops. - 2) or, has no incoming edges or no outgoing edges, ie, all its incident - edges point inward or all its incident edges point outward. - 3) or, it does not have exactly two neighbors and degree of 2 or 4. - 4) or, if strict mode is false, if its edges have different OSM IDs. + Return True if the node is a "true" endpoint of an edge in the network, + otherwise False. OpenStreetMap data includes many nodes that exist only as + geometric vertices to allow ways to curve. A true edge endpoint is a node + that satisfies at least 1 of the following 4 rules: + + 1) It is its own neighbor (ie, it self-loops). + + 2) Or, it has no incoming edges or no outgoing edges (ie, all its incident + edges are inbound or all its incident edges are outbound). + + 3) Or, it does not have exactly two neighbors and degree of 2 or 4. + + 4) Or, if `endpoint_attrs` is not None, and its incident edges have + different values than each other for any of the edge attributes in + `endpoint_attrs`. Parameters ---------- @@ -34,9 +41,11 @@ def _is_endpoint(G, node, strict=True): input graph node : int the node to examine - strict : bool - if False, allow nodes to be end points even if they fail all other rules - but have edges with different OSM IDs + endpoint_attrs : iterable + An iterable of edge attribute names for relaxing the strictness of + endpoint determination. If not None, a node is an endpoint if its + incident edges have different values then each other for any of the + edge attributes in `endpoint_attrs`. Returns ------- @@ -46,36 +55,37 @@ def _is_endpoint(G, node, strict=True): n = len(neighbors) d = G.degree(node) - # rule 1 + # RULE 1 + # if the node appears in its list of neighbors, it self-loops: this is + # always an endpoint if node in neighbors: - # if the node appears in its list of neighbors, it self-loops - # this is always an endpoint. return True - # rule 2 + # RULE 2 + # if node has no incoming edges or no outgoing edges, it is an endpoint if G.out_degree(node) == 0 or G.in_degree(node) == 0: - # if node has no incoming edges or no outgoing edges, it is an endpoint return True - # rule 3 + # RULE 3 + # else, if it does NOT have 2 neighbors AND either 2 or 4 directed edges, + # it is an endpoint. either it has 1 or 3+ neighbors, in which case it is + # a dead-end or an intersection of multiple streets or it has 2 neighbors + # but 3 degree (indicating a change from oneway to twoway) or more than 4 + # degree (indicating a parallel edge) and thus is an endpoint if not ((n == 2) and (d in {2, 4})): # noqa: PLR2004 - # else, if it does NOT have 2 neighbors AND either 2 or 4 directed - # edges, it is an endpoint. either it has 1 or 3+ neighbors, in which - # case it is a dead-end or an intersection of multiple streets or it has - # 2 neighbors but 3 degree (indicating a change from oneway to twoway) - # or more than 4 degree (indicating a parallel edge) and thus is an - # endpoint return True - # rule 4 - if not strict: - # non-strict mode: do its incident edges have different OSM IDs? - # first collect all the OSM way IDs for incoming edges - # then collect all the OSM way IDs for outgoing edges - # if there is more than 1 OSM ID then it is an endpoint, otherwise not - incoming = [G.edges[u, node, k]["osmid"] for u in G.predecessors(node) for k in G[u][node]] - outgoing = [G.edges[node, v, k]["osmid"] for v in G.successors(node) for k in G[node][v]] - return len(set(incoming + outgoing)) > 1 + # RULE 4 + # non-strict mode: do its incident edges have different attr values? for + # each attribute to check, collect the attribute's values in all inbound + # and outbound edges. if there is more than 1 unique value then then this + # node is an endpoint + if endpoint_attrs is not None: + for attr in endpoint_attrs: + in_values = {v for _, _, v in G.in_edges(node, data=attr, keys=False)} + out_values = {v for _, _, v in G.out_edges(node, data=attr, keys=False)} + if len(in_values | out_values) > 1: + return True # if none of the preceding rules passed, then it is not an endpoint return False @@ -151,7 +161,7 @@ def _build_path(G, endpoint, endpoint_successor, endpoints): return path -def _get_paths_to_simplify(G, strict=True): +def _get_paths_to_simplify(G, endpoint_attrs): """ Generate all the paths to be simplified between endpoint nodes. @@ -162,9 +172,11 @@ def _get_paths_to_simplify(G, strict=True): ---------- G : networkx.MultiDiGraph input graph - strict : bool - if False, allow nodes to be end points even if they fail all other rules - but have edges with different OSM IDs + endpoint_attrs : iterable + An iterable of edge attribute names for relaxing the strictness of + endpoint determination. If not None, a node is an endpoint if its + incident edges have different values then each other for any of the + edge attributes in `endpoint_attrs`. Yields ------ @@ -172,7 +184,7 @@ def _get_paths_to_simplify(G, strict=True): a generator of paths to simplify """ # first identify all the nodes that are endpoints - endpoints = {n for n in G.nodes if _is_endpoint(G, n, strict=strict)} + endpoints = {n for n in G.nodes if _is_endpoint(G, n, endpoint_attrs)} utils.log(f"Identified {len(endpoints):,} edge endpoints") # for each endpoint node, look at each of its successor nodes @@ -185,7 +197,7 @@ def _get_paths_to_simplify(G, strict=True): yield _build_path(G, endpoint, successor, endpoints) -def _remove_rings(G): +def _remove_rings(G, endpoint_attrs): """ Remove all self-contained rings from a graph. @@ -196,6 +208,11 @@ def _remove_rings(G): ---------- G : networkx.MultiDiGraph input graph + endpoint_attrs : iterable + An iterable of edge attribute names for relaxing the strictness of + endpoint determination. If not None, a node is an endpoint if its + incident edges have different values then each other for any of the + edge attributes in `endpoint_attrs`. Returns ------- @@ -204,35 +221,45 @@ def _remove_rings(G): """ nodes_in_rings = set() for wcc in nx.weakly_connected_components(G): - if not any(_is_endpoint(G, n) for n in wcc): + if not any(_is_endpoint(G, n, endpoint_attrs) for n in wcc): nodes_in_rings.update(wcc) G.remove_nodes_from(nodes_in_rings) return G -def simplify_graph(G, strict=True, remove_rings=True, track_merged=False): +def simplify_graph(G, strict=None, endpoint_attrs=None, remove_rings=True, track_merged=False): # noqa: C901 """ Simplify a graph's topology by removing interstitial nodes. - Simplifies graph topology by removing all nodes that are not intersections - or dead-ends. Create an edge directly between the end points that - encapsulate them, but retain the geometry of the original edges, saved as - a new `geometry` attribute on the new edge. Note that only simplified - edges receive a `geometry` attribute. Some of the resulting consolidated - edges may comprise multiple OSM ways, and if so, their multiple attribute - values are stored as a list. Optionally, the simplified edges can receive - a `merged_edges` attribute that contains a list of all the (u, v) node - pairs that were merged together. + This simplifies graph topology by removing all nodes that are not + intersections or dead-ends, by creating an edge directly between the end + points that encapsulate them while retaining the full geometry of the + original edges, saved as a new `geometry` attribute on the new edge. + + Note that only simplified edges receive a `geometry` attribute. Some of + the resulting consolidated edges may comprise multiple OSM ways, and if + so, their multiple attribute values are stored as a list. Optionally, the + simplified edges can receive a `merged_edges` attribute that contains a + list of all the (u, v) node pairs that were merged together. + + Use the `endpoint_attrs` parameter to relax simplification strictness. For + example, `endpoint_attrs=['osmid']` will retain every node whose incident + edges have different OSM IDs. This lets you keep nodes at elbow two-way + intersections (but be aware that sometimes individual blocks have multiple + OSM IDs within them too). You could also use this parameter to retain + nodes where sidewalks or bike lanes begin/end in the middle of a block. Parameters ---------- G : networkx.MultiDiGraph input graph strict : bool - if False, allow nodes to be end points even if they fail all other - rules but have incident edges with different OSM IDs. Lets you keep - nodes at elbow two-way intersections, but sometimes individual blocks - have multiple OSM IDs within them too. + deprecated, do not use + endpoint_attrs : iterable + An iterable of edge attribute names for relaxing the strictness of + endpoint determination. If not None, a node is an endpoint if its + incident edges have different values then each other for any of the + edge attributes in `endpoint_attrs`. remove_rings : bool if True, remove isolated self-contained rings that have no endpoints track_merged : bool @@ -245,6 +272,17 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False): topologically simplified graph, with a new `geometry` attribute on each simplified edge """ + if strict is not None: + msg = ( + "The `strict` parameter has been deprecated and will be removed in " + "the v2.0.0 release. Use the `endpoint_attrs` parameter instead to " + "relax simplification strictness. For example, `endpoint_attrs=None` " + "reproduces the old `strict=True` behvavior and `endpoint_attrs=['osmid']` " + "reproduces the old `strict=False` behavior." + ) + # maintain old behavior if strict is passed during deprecation + endpoint_attrs = None if strict else ["osmid"] + if "simplified" in G.graph and G.graph["simplified"]: # pragma: no cover msg = "This graph has already been simplified, cannot simplify it again." raise GraphSimplificationError(msg) @@ -262,7 +300,7 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False): all_edges_to_add = [] # generate each path that needs to be simplified - for path in _get_paths_to_simplify(G, strict=strict): + for path in _get_paths_to_simplify(G, endpoint_attrs): # add the interstitial edges we're removing to a list so we can retain # their spatial geometry merged_edges = [] @@ -332,7 +370,7 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False): G.remove_nodes_from(set(all_nodes_to_remove)) if remove_rings: - G = _remove_rings(G) + G = _remove_rings(G, endpoint_attrs) # mark the graph as having been simplified G.graph["simplified"] = True @@ -523,8 +561,7 @@ def _consolidate_intersections_rebuild_graph(G, tolerance=10, reconnect_edges=Tr wccs = list(nx.weakly_connected_components(G.subgraph(nodes_subset.index))) if len(wccs) > 1: # if there are multiple components in this cluster - suffix = 0 - for wcc in wccs: + for suffix, wcc in enumerate(wccs): # set subcluster xy to the centroid of just these nodes idx = list(wcc) subcluster_centroid = node_points.loc[idx].unary_union.centroid @@ -532,7 +569,6 @@ def _consolidate_intersections_rebuild_graph(G, tolerance=10, reconnect_edges=Tr gdf.loc[idx, "y"] = subcluster_centroid.y # move to subcluster by appending suffix to cluster label gdf.loc[idx, "cluster"] = f"{cluster_label}-{suffix}" - suffix += 1 # give nodes unique integer IDs (subclusters with suffixes are strings) gdf["cluster"] = gdf["cluster"].factorize()[0]