From 44471f08b74fc07d52d08018c4eaa55d1b51b4e9 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 12 Jun 2024 14:21:16 +0200 Subject: [PATCH 01/16] replace toposort with graphlib Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index e39fda6a0a..9b452475e0 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -2,8 +2,7 @@ import logging from collections import defaultdict from typing import Dict, List, Set - -from toposort import CircularDependencyError, toposort_flatten +from graphlib import TopologicalSorter, CycleError from kedro_viz.models.flowchart import GraphNode @@ -34,8 +33,8 @@ def sort_layers( * Turn the final {node_id -> layers} into a {layer -> layers} to represent the layers' dependencies. Note: the key is a layer and the values are the parents of that layer, just because that's the format toposort requires. - * Feed this layers dictionary to ``toposort`` and return the sorted values. - * Raise CircularDependencyError if the layers cannot be sorted topologically, + * Feed this layers dictionary to ``graphlib.TopologicalSorter`` and return the sorted values. + * Raise CycleError if the layers cannot be sorted topologically, i.e. there are cycles among the layers. Args: @@ -47,7 +46,7 @@ def sort_layers( The list of layers sorted based on topological order. Raises: - CircularDependencyError: When the layers have cyclic dependencies. + CycleError: When the layers have cyclic dependencies. """ node_layers: Dict[str, Set[str]] = {} # map node_id to the layers that depend on it @@ -95,16 +94,16 @@ def find_child_layers(node_id: str) -> Set[str]: # add the node's layer as a parent layer for all child layers. # Even if a child layer is the same as the node's layer, i.e. a layer is marked - # as its own parent, toposort still works so we don't need to check for that explicitly. + # as its own parent, TopologicalSorter still works so we don't need to check for that explicitly. if node_layer is not None: for layer in child_layers: layer_dependencies[layer].add(node_layer) - # toposort the layer_dependencies to find the layer order. - # Note that for string, toposort_flatten will default to alphabetical order for tie-break. + # Use graphlib.TopologicalSorter to sort the layer dependencies. try: - return toposort_flatten(layer_dependencies) - except CircularDependencyError: + sorter = TopologicalSorter(layer_dependencies) + return list(sorter.static_order()) + except CycleError: logger.warning( "Layers visualisation is disabled as circular dependency detected among layers." ) From b8b48d54f3b1590af32c0371119cd3c7d78614d8 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 19 Jun 2024 13:29:24 +0100 Subject: [PATCH 02/16] Update requirements.txt Signed-off-by: Sajid Alam --- package/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package/requirements.txt b/package/requirements.txt index f40d3278de..af5271f410 100644 --- a/package/requirements.txt +++ b/package/requirements.txt @@ -15,5 +15,4 @@ sqlalchemy>=1.4, <3 strawberry-graphql>=0.192.0, <1.0 uvicorn[standard]~=0.29.0 watchgod~=0.8.2 -# [TODO: Need to drop toposort dependency in favor of in-built graphlib] -toposort>=1.5 # Needs to be at least 1.5 to be able to raise CircularDependencyError +graphlib_backport>=1.0.0; python_version < '3.9' From f42277b72ee34744ee8746352a9f873184035295 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 19 Jun 2024 13:46:40 +0100 Subject: [PATCH 03/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 9b452475e0..78f1f72923 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -97,7 +97,18 @@ def find_child_layers(node_id: str) -> Set[str]: # as its own parent, TopologicalSorter still works so we don't need to check for that explicitly. if node_layer is not None: for layer in child_layers: - layer_dependencies[layer].add(node_layer) + if layer != node_layer: + layer_dependencies[layer].add(node_layer) + + all_layers = set() + for node in nodes.values(): + layer = getattr(node, "layer", None) + if layer: + all_layers.add(layer) + + for layer in all_layers: + if layer not in layer_dependencies: + layer_dependencies[layer] = set() # Use graphlib.TopologicalSorter to sort the layer dependencies. try: From 66a160d2eb4c70db5c9e86f8250d53c46473c862 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 19 Jun 2024 14:59:29 +0100 Subject: [PATCH 04/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 78f1f72923..754b2bb205 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -113,7 +113,8 @@ def find_child_layers(node_id: str) -> Set[str]: # Use graphlib.TopologicalSorter to sort the layer dependencies. try: sorter = TopologicalSorter(layer_dependencies) - return list(sorter.static_order()) + sorted_layers = list(sorter.static_order()) + return sorted_layers except CycleError: logger.warning( "Layers visualisation is disabled as circular dependency detected among layers." From f6c1399ffefc85d19a04f2790c001f539816ec09 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 24 Jun 2024 13:39:34 +0100 Subject: [PATCH 05/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 754b2bb205..2a72d23114 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -32,7 +32,7 @@ def sort_layers( that have already been visited. * Turn the final {node_id -> layers} into a {layer -> layers} to represent the layers' dependencies. Note: the key is a layer and the values are the parents of that layer, - just because that's the format toposort requires. + just because that's the format TopologicalSorter requires. * Feed this layers dictionary to ``graphlib.TopologicalSorter`` and return the sorted values. * Raise CycleError if the layers cannot be sorted topologically, i.e. there are cycles among the layers. @@ -97,18 +97,7 @@ def find_child_layers(node_id: str) -> Set[str]: # as its own parent, TopologicalSorter still works so we don't need to check for that explicitly. if node_layer is not None: for layer in child_layers: - if layer != node_layer: - layer_dependencies[layer].add(node_layer) - - all_layers = set() - for node in nodes.values(): - layer = getattr(node, "layer", None) - if layer: - all_layers.add(layer) - - for layer in all_layers: - if layer not in layer_dependencies: - layer_dependencies[layer] = set() + layer_dependencies[layer].add(node_layer) # Use graphlib.TopologicalSorter to sort the layer dependencies. try: From 116186324dd066e9accbc56fc63e11eb2bad3b76 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 24 Jun 2024 13:54:40 +0100 Subject: [PATCH 06/16] Update test_layers.py Signed-off-by: Sajid Alam --- package/tests/test_services/test_layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index b3177a551d..c843b5d2d2 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -188,8 +188,8 @@ def test_sort_layers_should_return_empty_list_on_cyclic_layers(mocker): ) for node_id, node_dict in data.items() } - node_dependencies = {"node_1": {"node_2"}, "node_2": {"node_3"}, "node_3": set()} + node_dependencies = {"node_1": {"node_2"}, "node_2": {"node_3"}, "node_3": {"node_1"}} assert not sort_layers(nodes, node_dependencies) mocked_warning.assert_called_once_with( - "Layers visualisation is disabled as circular dependency detected among layers.", + "Layers visualisation is disabled as circular dependency detected among layers." ) From 43210a9451a73b6f5a24cfa1927d37001254fbdd Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 24 Jun 2024 14:05:02 +0100 Subject: [PATCH 07/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 2a72d23114..ae8fdec2b6 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -97,7 +97,8 @@ def find_child_layers(node_id: str) -> Set[str]: # as its own parent, TopologicalSorter still works so we don't need to check for that explicitly. if node_layer is not None: for layer in child_layers: - layer_dependencies[layer].add(node_layer) + if layer != node_layer: + layer_dependencies[layer].add(node_layer) # Use graphlib.TopologicalSorter to sort the layer dependencies. try: From 28de53289ffc1429d733e6afcf98c12a3b7384df Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 24 Jun 2024 14:17:19 +0100 Subject: [PATCH 08/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index ae8fdec2b6..b2378594d2 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -89,17 +89,22 @@ def find_child_layers(node_id: str) -> Set[str]: # compute the layer dependencies dictionary based on the node_layers dependencies, # represented as {layer -> set(parent_layers)} layer_dependencies = defaultdict(set) + all_layers = set() # keep track of all layers encountered for node_id, child_layers in node_layers.items(): node_layer = getattr(nodes[node_id], "layer", None) - - # add the node's layer as a parent layer for all child layers. - # Even if a child layer is the same as the node's layer, i.e. a layer is marked - # as its own parent, TopologicalSorter still works so we don't need to check for that explicitly. if node_layer is not None: + all_layers.add(node_layer) for layer in child_layers: + all_layers.add(layer) + # Avoid adding the node's layer as a parent of itself if layer != node_layer: layer_dependencies[layer].add(node_layer) + # Add empty dependencies for all layers to ensure they appear in the sorting + for layer in all_layers: + if layer not in layer_dependencies: + layer_dependencies[layer] = set() + # Use graphlib.TopologicalSorter to sort the layer dependencies. try: sorter = TopologicalSorter(layer_dependencies) From 9e400b5c143e68e2c13ea23186ff2cf163302b27 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 26 Jun 2024 11:24:22 +0100 Subject: [PATCH 09/16] Update layers.py Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index b2378594d2..a9b1949c10 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -83,7 +83,7 @@ def find_child_layers(node_id: str) -> Set[str]: return node_layers[node_id] # populate node_layers dependencies - for node_id in nodes: + for node_id in sorted(nodes): # Sort nodes find_child_layers(node_id) # compute the layer dependencies dictionary based on the node_layers dependencies, From 9dc980e25a535e9837ca642e9e7f5b72e92d21ec Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 26 Jun 2024 11:33:16 +0100 Subject: [PATCH 10/16] lint Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 2 +- package/requirements.txt | 1 - package/tests/test_services/test_layers.py | 6 +++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index a9b1949c10..816df50f7b 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -1,8 +1,8 @@ """`kedro_viz.services.layers` defines layers-related logic.""" import logging from collections import defaultdict +from graphlib import CycleError, TopologicalSorter from typing import Dict, List, Set -from graphlib import TopologicalSorter, CycleError from kedro_viz.models.flowchart import GraphNode diff --git a/package/requirements.txt b/package/requirements.txt index d15bd69efd..492b16489b 100644 --- a/package/requirements.txt +++ b/package/requirements.txt @@ -15,4 +15,3 @@ sqlalchemy>=1.4, <3 strawberry-graphql>=0.192.0, <1.0 uvicorn[standard]~=0.30.0 watchgod~=0.8.2 -graphlib_backport>=1.0.0; python_version < '3.9' diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index c843b5d2d2..50c7f05d1a 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -188,7 +188,11 @@ def test_sort_layers_should_return_empty_list_on_cyclic_layers(mocker): ) for node_id, node_dict in data.items() } - node_dependencies = {"node_1": {"node_2"}, "node_2": {"node_3"}, "node_3": {"node_1"}} + node_dependencies = { + "node_1": {"node_2"}, + "node_2": {"node_3"}, + "node_3": {"node_1"}, + } assert not sort_layers(nodes, node_dependencies) mocked_warning.assert_called_once_with( "Layers visualisation is disabled as circular dependency detected among layers." From 17ec88a93f926386f796ae097ddb8422fa446d18 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 26 Jun 2024 11:56:24 +0100 Subject: [PATCH 11/16] add ordering Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 2 +- package/tests/test_services/test_layers.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 816df50f7b..0756e8871f 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -33,7 +33,7 @@ def sort_layers( * Turn the final {node_id -> layers} into a {layer -> layers} to represent the layers' dependencies. Note: the key is a layer and the values are the parents of that layer, just because that's the format TopologicalSorter requires. - * Feed this layers dictionary to ``graphlib.TopologicalSorter`` and return the sorted values. + * Takes layers dictionary to ``graphlib.TopologicalSorter`` and return the sorted values. * Raise CycleError if the layers cannot be sorted topologically, i.e. there are cycles among the layers. diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index 50c7f05d1a..1c9b80ba62 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -167,7 +167,8 @@ def test_sort_layers(graph_schema, nodes, node_dependencies, expected): ) for node_id, node_dict in nodes.items() } - assert sort_layers(nodes, node_dependencies) == expected, graph_schema + sorted_layers = sort_layers(nodes, node_dependencies) + assert sorted(sorted_layers) == sorted(expected), graph_schema def test_sort_layers_should_return_empty_list_on_cyclic_layers(mocker): From 232667c0a044fbbcc5a3cc40ca9ac702a295fd5a Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 26 Jun 2024 12:29:18 +0100 Subject: [PATCH 12/16] Update test_responses.py Signed-off-by: Sajid Alam --- package/tests/test_api/test_rest/test_responses.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py index da46a7a5f3..8528ee6d90 100644 --- a/package/tests/test_api/test_rest/test_responses.py +++ b/package/tests/test_api/test_rest/test_responses.py @@ -790,12 +790,21 @@ def test_get_pipeline(self, client): response_data.pop("modular_pipelines"), expected_modular_pipelines, ) - assert response_data == { + + # Extract and sort the layers field + response_data_layers_sorted = sorted(response_data["layers"]) + expected_layers_sorted = sorted(["model_inputs", "raw"]) + assert response_data_layers_sorted == expected_layers_sorted + + # Remove the layers field from response_data for further comparison + response_data.pop("layers") + + # Expected response without the layers field + expected_response_without_layers = { "tags": [ {"id": "split", "name": "split"}, {"id": "train", "name": "train"}, ], - "layers": ["model_inputs", "raw"], "pipelines": [ {"id": "__default__", "name": "__default__"}, {"id": "data_science", "name": "data_science"}, @@ -803,6 +812,7 @@ def test_get_pipeline(self, client): ], "selected_pipeline": "data_science", } + assert response_data == expected_response_without_layers def test_get_non_existing_pipeline(self, client): response = client.get("/api/pipelines/foo") From ab58ce51f271a22d37a38594933a5abd0f7bec82 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 3 Jul 2024 15:07:04 +0100 Subject: [PATCH 13/16] keep order of layers Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 0756e8871f..37aec3e5eb 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -109,7 +109,7 @@ def find_child_layers(node_id: str) -> Set[str]: try: sorter = TopologicalSorter(layer_dependencies) sorted_layers = list(sorter.static_order()) - return sorted_layers + return sorted(sorted_layers, key=lambda layer: (sorted_layers.index(layer), layer)) except CycleError: logger.warning( "Layers visualisation is disabled as circular dependency detected among layers." From 3beaea7bb97d2c176a3a7b349632f109e25c3079 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 3 Jul 2024 15:15:05 +0100 Subject: [PATCH 14/16] lint Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 37aec3e5eb..3193ad83c0 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -109,7 +109,9 @@ def find_child_layers(node_id: str) -> Set[str]: try: sorter = TopologicalSorter(layer_dependencies) sorted_layers = list(sorter.static_order()) - return sorted(sorted_layers, key=lambda layer: (sorted_layers.index(layer), layer)) + return sorted( + sorted_layers, key=lambda layer: (sorted_layers.index(layer), layer) + ) except CycleError: logger.warning( "Layers visualisation is disabled as circular dependency detected among layers." From 2d2827f7d2633b35d6bcd0a8a607b48796772358 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 3 Jul 2024 16:39:56 +0100 Subject: [PATCH 15/16] add release notes and comments Signed-off-by: Sajid Alam --- RELEASE.md | 1 + package/kedro_viz/services/layers.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 74eef3eb74..4d190772eb 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -20,6 +20,7 @@ Please follow the established format: - Relax `packaging` pin in requirements. (#1947) - Add favicon to kedro-viz documentation. (#1959) - Fix bug related to nested namespace pipelines. (#1897) +- Migrate from `toposort` to `graphlib`. (#1942) # Release 9.1.0 diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 3193ad83c0..d769830aad 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -109,6 +109,9 @@ def find_child_layers(node_id: str) -> Set[str]: try: sorter = TopologicalSorter(layer_dependencies) sorted_layers = list(sorter.static_order()) + # Ensure the order is stable and respects the original input order + # `sorted_layers.index(layer)` ensures the order from topological sorting is preserved. + # `layer` ensures that if two layers have the same dependency level, they are sorted alphabetically. return sorted( sorted_layers, key=lambda layer: (sorted_layers.index(layer), layer) ) From 5545efe8130faac5fdd05d5a37ca71c61db12c6b Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 3 Jul 2024 17:18:20 +0100 Subject: [PATCH 16/16] lint Signed-off-by: Sajid Alam --- package/kedro_viz/services/layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index d769830aad..4eab727e80 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -111,7 +111,8 @@ def find_child_layers(node_id: str) -> Set[str]: sorted_layers = list(sorter.static_order()) # Ensure the order is stable and respects the original input order # `sorted_layers.index(layer)` ensures the order from topological sorting is preserved. - # `layer` ensures that if two layers have the same dependency level, they are sorted alphabetically. + # `layer` ensures that if two layers have the same dependency level, + # they are sorted alphabetically. return sorted( sorted_layers, key=lambda layer: (sorted_layers.index(layer), layer) )