From 29b3b3a99f1ee7fce02072a91e84b5e7ca4ef0b3 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 17 Oct 2024 12:19:10 +0100 Subject: [PATCH 01/18] imitial Signed-off-by: Sajid Alam --- package/kedro_viz/api/rest/responses.py | 2 +- package/kedro_viz/data_access/managers.py | 2 +- .../data_access/repositories/graph.py | 2 +- .../repositories/modular_pipelines.py | 2 +- .../repositories/registered_pipelines.py | 2 +- .../data_access/repositories/tags.py | 2 +- package/kedro_viz/models/flowchart.py | 934 ------------------ .../kedro_viz/models/flowchart/__init__.py | 0 package/kedro_viz/models/flowchart/edges.py | 12 + .../kedro_viz/models/flowchart/metadata.py | 64 ++ package/kedro_viz/models/flowchart/nodes.py | 100 ++ package/kedro_viz/services/layers.py | 2 +- package/tests/conftest.py | 2 +- .../test_api/test_rest/test_responses.py | 2 +- .../tests/test_data_access/test_managers.py | 2 +- .../test_repositories/test_graph.py | 2 +- .../test_modular_pipelines.py | 2 +- package/tests/test_models/test_flowchart.py | 2 +- package/tests/test_services/test_layers.py | 2 +- 19 files changed, 190 insertions(+), 948 deletions(-) delete mode 100644 package/kedro_viz/models/flowchart.py create mode 100644 package/kedro_viz/models/flowchart/__init__.py create mode 100644 package/kedro_viz/models/flowchart/edges.py create mode 100644 package/kedro_viz/models/flowchart/metadata.py create mode 100644 package/kedro_viz/models/flowchart/nodes.py diff --git a/package/kedro_viz/api/rest/responses.py b/package/kedro_viz/api/rest/responses.py index 2f59d33b16..eb0c7e02bc 100644 --- a/package/kedro_viz/api/rest/responses.py +++ b/package/kedro_viz/api/rest/responses.py @@ -13,7 +13,7 @@ from kedro_viz.api.rest.utils import get_package_compatibilities from kedro_viz.data_access import data_access_manager -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( DataNode, DataNodeMetadata, ParametersNodeMetadata, diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 40e00ebe55..f89894fadb 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -14,7 +14,7 @@ from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( DataNode, GraphEdge, GraphNode, diff --git a/package/kedro_viz/data_access/repositories/graph.py b/package/kedro_viz/data_access/repositories/graph.py index 90f734ec1d..b0fd60c348 100644 --- a/package/kedro_viz/data_access/repositories/graph.py +++ b/package/kedro_viz/data_access/repositories/graph.py @@ -3,7 +3,7 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Dict, Generator, List, Optional, Set -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.flowchart import GraphEdge, GraphNode class GraphNodesRepository: diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index 25b7645ff4..c5ab6d7152 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -9,7 +9,7 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( GraphNode, GraphNodeType, ModularPipelineChild, diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index 16cdd98adf..3e72766a62 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -4,7 +4,7 @@ from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart import RegisteredPipeline +from kedro_viz.models.flowchart.flowchart import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/data_access/repositories/tags.py b/package/kedro_viz/data_access/repositories/tags.py index eae5c68bb0..844c64ffdc 100644 --- a/package/kedro_viz/data_access/repositories/tags.py +++ b/package/kedro_viz/data_access/repositories/tags.py @@ -3,7 +3,7 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Iterable, List, Set -from kedro_viz.models.flowchart import Tag +from kedro_viz.models.flowchart.flowchart import Tag class TagsRepository: diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py deleted file mode 100644 index 8828650a7e..0000000000 --- a/package/kedro_viz/models/flowchart.py +++ /dev/null @@ -1,934 +0,0 @@ -"""`kedro_viz.models.flowchart` defines data models to represent Kedro entities in a viz graph.""" - -# pylint: disable=protected-access, missing-function-docstring -import abc -import inspect -import logging -from enum import Enum -from pathlib import Path -from types import FunctionType -from typing import Any, ClassVar, Dict, List, Optional, Set, Union, cast - -from fastapi.encoders import jsonable_encoder -from kedro.pipeline.node import Node as KedroNode -from pydantic import ( - BaseModel, - ConfigDict, - Field, - ValidationInfo, - field_validator, - model_validator, -) - -from kedro_viz.models.utils import get_dataset_type -from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding - -try: - # kedro 0.18.11 onwards - from kedro.io.core import DatasetError -except ImportError: # pragma: no cover - # older versions - from kedro.io.core import DataSetError as DatasetError # type: ignore -try: - # kedro 0.18.12 onwards - from kedro.io.core import AbstractDataset -except ImportError: # pragma: no cover - # older versions - from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore - -logger = logging.getLogger(__name__) - - -def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: - filepath = dataset_description.get("filepath") or dataset_description.get("path") - return str(filepath) if filepath else None - - -class NamedEntity(BaseModel): - """Represent a named entity (Tag/Registered Pipeline) in a Kedro project - Args: - id (str): Id of the registered pipeline - - Raises: - AssertionError: If id is not supplied during instantiation - """ - - id: str - name: Optional[str] = Field( - default=None, - validate_default=True, - description="The name of the registered pipeline", - ) - - @field_validator("name") - @classmethod - def set_name(cls, _, info: ValidationInfo): - assert "id" in info.data - return info.data["id"] - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project""" - - -class GraphNodeType(str, Enum): - """Represent all possible node types in the graph representation of a Kedro pipeline. - The type needs to inherit from str as well so FastAPI can serialise it. See: - https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations - """ - - TASK = "task" - DATA = "data" - PARAMETERS = "parameters" - MODULAR_PIPELINE = ( - "modularPipeline" # camelCase so it can be referred directly to in the frontend - ) - - -class ModularPipelineChild(BaseModel, frozen=True): - """Represent a child of a modular pipeline. - - Args: - id (str): Id of the modular pipeline child - type (GraphNodeType): Type of modular pipeline child - """ - - id: str - type: GraphNodeType - - -class Tag(NamedEntity): - """Represent a tag in a Kedro project""" - - def __hash__(self) -> int: - return hash(self.id) - - -class GraphNode(BaseModel, abc.ABC): - """Represent a node in the graph representation of a Kedro pipeline. - All node models except the metadata node models should inherit from this class - - Args: - id (str): A unique identifier for the node in the graph, - obtained by hashing the node's string representation. - name (str): The full name of this node obtained from the underlying Kedro object - type (str): The type of the graph node - tags (Set[str]): The tags associated with this node. Defaults to `set()`. - kedro_obj (Optional[Union[KedroNode, AbstractDataset]]): The underlying Kedro object - for each graph node, if any. Defaults to `None`. - pipelines (Set[str]): The set of registered pipeline IDs this - node belongs to. Defaults to `set()`. - modular_pipelines (Optional[Set(str)]): A set of modular pipeline names - this node belongs to. - - """ - - id: str - name: str - type: str - tags: Set[str] = Field(set(), description="The tags associated with this node") - kedro_obj: Optional[Union[KedroNode, AbstractDataset]] = Field( - None, - description="The underlying Kedro object for each graph node, if any", - exclude=True, - ) - pipelines: Set[str] = Field( - set(), description="The set of registered pipeline IDs this node belongs to" - ) - - modular_pipelines: Optional[Set[str]] = Field( - default=None, - validate_default=True, - description="The modular_pipelines this node belongs to", - ) - model_config = ConfigDict(arbitrary_types_allowed=True) - - @classmethod - def create_task_node( - cls, node: KedroNode, node_id: str, modular_pipelines: Optional[Set[str]] - ) -> "TaskNode": - """Create a graph node of type task for a given Kedro Node instance. - Args: - node: A node in a Kedro pipeline. - node_id: Id of the task node. - modular_pipelines: A set of modular_pipeline_ids the node belongs to. - Returns: - An instance of TaskNode. - """ - node_name = node._name or node._func_name - return TaskNode( - id=node_id, - name=node_name, - tags=set(node.tags), - kedro_obj=node, - modular_pipelines=modular_pipelines, - ) - - @classmethod - # pylint: disable=too-many-positional-arguments - def create_data_node( - cls, - dataset_id: str, - dataset_name: str, - layer: Optional[str], - tags: Set[str], - dataset: AbstractDataset, - stats: Optional[Dict], - modular_pipelines: Optional[Set[str]], - is_free_input: bool = False, - ) -> Union["DataNode", "TranscodedDataNode"]: - """Create a graph node of type data for a given Kedro Dataset instance. - Args: - dataset_id: A hashed id for the dataset node - dataset_name: The name of the dataset, including namespace, e.g. - data_science.master_table. - layer: The optional layer that the dataset belongs to. - tags: The set of tags assigned to assign to the graph representation - of this dataset. N.B. currently it's derived from the node's tags. - dataset: A dataset in a Kedro pipeline. - stats: The dictionary of dataset statistics, e.g. - {"rows":2, "columns":3, "file_size":100} - modular_pipelines: A set of modular_pipeline_ids the node belongs to. - is_free_input: Whether the dataset is a free input in the pipeline - Returns: - An instance of DataNode. - """ - is_transcoded_dataset = TRANSCODING_SEPARATOR in dataset_name - if is_transcoded_dataset: - name = _strip_transcoding(dataset_name) - return TranscodedDataNode( - id=dataset_id, - name=name, - tags=tags, - layer=layer, - is_free_input=is_free_input, - stats=stats, - modular_pipelines=modular_pipelines, - ) - - return DataNode( - id=dataset_id, - name=dataset_name, - tags=tags, - layer=layer, - kedro_obj=dataset, - is_free_input=is_free_input, - stats=stats, - modular_pipelines=modular_pipelines, - ) - - @classmethod - # pylint: disable=too-many-positional-arguments - def create_parameters_node( - cls, - dataset_id: str, - dataset_name: str, - layer: Optional[str], - tags: Set[str], - parameters: AbstractDataset, - modular_pipelines: Optional[Set[str]], - ) -> "ParametersNode": - """Create a graph node of type parameters for a given Kedro parameters dataset instance. - Args: - dataset_id: A hashed id for the parameters node - dataset_name: The name of the dataset, including namespace, e.g. - data_science.test_split_ratio - layer: The optional layer that the parameters belong to. - tags: The set of tags assigned to assign to the graph representation - of this dataset. N.B. currently it's derived from the node's tags. - parameters: A parameters dataset in a Kedro pipeline. - modular_pipelines: A set of modular_pipeline_ids the node belongs to. - Returns: - An instance of ParametersNode. - """ - return ParametersNode( - id=dataset_id, - name=dataset_name, - tags=tags, - layer=layer, - kedro_obj=parameters, - modular_pipelines=modular_pipelines, - ) - - @classmethod - def create_modular_pipeline_node( - cls, modular_pipeline_id: str - ) -> "ModularPipelineNode": - """Create a graph node of type modularPipeline for a given modular pipeline ID. - This is used to visualise all modular pipelines in a Kedro project on the graph. - Args: - modular_pipeline_id: The ID of the modular pipeline to convert into a graph node. - Returns: - An instance of ModularPipelineNode. - Example: - >>> node = GraphNode.create_modular_pipeline_node("pipeline.data_science") - >>> assert node.id == "pipeline.data_science" - >>> assert node.name == "pipeline.data_science" - >>> assert node.type == GraphNodeType.MODULAR_PIPELINE - """ - return ModularPipelineNode(id=modular_pipeline_id, name=modular_pipeline_id) - - def add_pipeline(self, pipeline_id: str): - """Add a pipeline_id to the list of pipelines that this node belongs to.""" - self.pipelines.add(pipeline_id) - - def belongs_to_pipeline(self, pipeline_id: str) -> bool: - """Check whether this graph node belongs to a given pipeline_id.""" - return pipeline_id in self.pipelines - - def has_metadata(self) -> bool: - """Check whether this graph node has metadata. - Since metadata of a graph node is derived from the underlying Kedro object, - we just need to check whether the underlying object exists. - """ - return self.kedro_obj is not None - - -class GraphNodeMetadata(BaseModel, abc.ABC): - """Represent a graph node's metadata""" - - -class TaskNode(GraphNode): - """Represent a graph node of type task - - Raises: - AssertionError: If kedro_obj is not supplied during instantiation - """ - - parameters: Dict = Field( - {}, description="A dictionary of parameter values for the task node" - ) - - # The type for Task node - type: str = GraphNodeType.TASK.value - - namespace: Optional[str] = Field( - default=None, - validate_default=True, - description="The original namespace on this node", - ) - - @model_validator(mode="before") - @classmethod - def check_kedro_obj_exists(cls, values): - assert "kedro_obj" in values - return values - - @field_validator("namespace") - @classmethod - def set_namespace(cls, _, info: ValidationInfo): - return info.data["kedro_obj"].namespace - - -def _extract_wrapped_func(func: FunctionType) -> FunctionType: - """Extract a wrapped decorated function to inspect the source code if available. - Adapted from https://stackoverflow.com/a/43506509/1684058 - """ - if func.__closure__ is None: - return func - closure = (c.cell_contents for c in func.__closure__) - wrapped_func = next((c for c in closure if isinstance(c, FunctionType)), None) - # return the original function if it's not a decorated function - return func if wrapped_func is None else wrapped_func - - -class ModularPipelineNode(GraphNode): - """Represent a modular pipeline node in the graph""" - - # A modular pipeline doesn't belong to any other modular pipeline, - # in the same sense as other types of GraphNode do. - # Therefore it's default to None. - # The parent-child relationship between modular pipeline themselves is modelled explicitly. - modular_pipelines: Optional[Set[str]] = None - - # Model the modular pipelines tree using a child-references representation of a tree. - # See: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-child-references/ - # for more details. - # For example, if a node namespace is "uk.data_science", - # the "uk" modular pipeline node's children are ["uk.data_science"] - children: Set[ModularPipelineChild] = Field( - set(), description="The children for the modular pipeline node" - ) - - inputs: Set[str] = Field( - set(), description="The input datasets to the modular pipeline node" - ) - - outputs: Set[str] = Field( - set(), description="The output datasets from the modular pipeline node" - ) - - # The type for Modular Pipeline Node - type: str = GraphNodeType.MODULAR_PIPELINE.value - - -class TaskNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TaskNode - - Args: - task_node (TaskNode): Task node to which this metadata belongs to. - - Raises: - AssertionError: If task_node is not supplied during instantiation - """ - - task_node: TaskNode = Field(..., exclude=True) - - code: Optional[str] = Field( - default=None, - validate_default=True, - description="Source code of the node's function", - ) - - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="Path to the file where the node is defined", - ) - - parameters: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The parameters of the node, if available", - ) - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="The command to run the pipeline to this node", - ) - - inputs: Optional[List[str]] = Field( - default=None, validate_default=True, description="The inputs to the TaskNode" - ) - outputs: Optional[List[str]] = Field( - default=None, validate_default=True, description="The outputs from the TaskNode" - ) - - @model_validator(mode="before") - @classmethod - def check_task_node_exists(cls, values): - assert "task_node" in values - cls.set_task_and_kedro_node(values["task_node"]) - return values - - @classmethod - def set_task_and_kedro_node(cls, task_node): - cls.task_node = task_node - cls.kedro_node = cast(KedroNode, task_node.kedro_obj) - - @field_validator("code") - @classmethod - def set_code(cls, code): - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) - return code - - return None - - @field_validator("filepath") - @classmethod - def set_filepath(cls, filepath): - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code_full_path = ( - Path(inspect.getfile(cls.kedro_node.func)).expanduser().resolve() - ) - - try: - filepath = code_full_path.relative_to(Path.cwd().parent) - except ValueError: # pragma: no cover - # if the filepath can't be resolved relative to the current directory, - # e.g. either during tests or during launching development server - # outside of a Kedro project, simply return the fullpath to the file. - filepath = code_full_path - - return str(filepath) - - return None - - @field_validator("parameters") - @classmethod - def set_parameters(cls, _): - return cls.task_node.parameters - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - return f"kedro run --to-nodes='{cls.kedro_node.name}'" - - @field_validator("inputs") - @classmethod - def set_inputs(cls, _): - return cls.kedro_node.inputs - - @field_validator("outputs") - @classmethod - def set_outputs(cls, _): - return cls.kedro_node.outputs - - -# pylint: disable=missing-function-docstring -class DataNode(GraphNode): - """Represent a graph node of type data - - Args: - layer (Optional[str]): The layer that this data node belongs to. Defaults to `None`. - is_free_input (bool): Determines whether the data node is a free input. Defaults to `False`. - stats (Optional[Dict]): Statistics for the data node. Defaults to `None`. - - Raises: - AssertionError: If kedro_obj, name are not supplied during instantiation - """ - - layer: Optional[str] = Field( - None, description="The layer that this data node belongs to" - ) - is_free_input: bool = Field( - False, description="Determines whether the data node is a free input" - ) - stats: Optional[Dict] = Field(None, description="The statistics for the data node.") - - dataset_type: Optional[str] = Field( - default=None, - validate_default=True, - description="The concrete type of the underlying kedro_obj", - ) - - viz_metadata: Optional[Dict] = Field( - default=None, validate_default=True, description="The metadata for data node" - ) - - run_command: Optional[str] = Field( - None, description="The command to run the pipeline to this node" - ) - - # The type for data node - type: str = GraphNodeType.DATA.value - - @model_validator(mode="before") - @classmethod - def check_kedro_obj_exists(cls, values): - assert "kedro_obj" in values - return values - - @field_validator("dataset_type") - @classmethod - def set_dataset_type(cls, _, info: ValidationInfo): - kedro_obj = cast(AbstractDataset, info.data.get("kedro_obj")) - return get_dataset_type(kedro_obj) - - @field_validator("viz_metadata") - @classmethod - def set_viz_metadata(cls, _, info: ValidationInfo): - kedro_obj = cast(AbstractDataset, info.data.get("kedro_obj")) - - if hasattr(kedro_obj, "metadata") and kedro_obj.metadata: - return kedro_obj.metadata.get("kedro-viz", None) - - return None - - def get_preview_args(self): - """Gets the preview arguments for a dataset""" - return self.viz_metadata.get("preview_args", None) - - def is_preview_enabled(self): - """Checks if the dataset has a preview enabled at the node level.""" - return ( - self.viz_metadata is None or self.viz_metadata.get("preview") is not False - ) - - -class TranscodedDataNode(GraphNode): - """Represent a graph node of type data - - Args: - layer (Optional[str]): The layer that this transcoded data - node belongs to. Defaults to `None`. - is_free_input (bool): Determines whether the transcoded data - node is a free input. Defaults to `False`. - stats (Optional[Dict]): Statistics for the data node - - Raises: - AssertionError: If name is not supplied during instantiation - - """ - - layer: Optional[str] = Field( - None, description="The layer that this transcoded data node belongs to" - ) - is_free_input: bool = Field( - False, description="Determines whether the transcoded data node is a free input" - ) - stats: Optional[Dict] = Field(None, description="The statistics for the data node.") - original_version: Optional[AbstractDataset] = Field( - None, - description="The original Kedro's AbstractDataset for this transcoded data node", - ) - original_name: Optional[str] = Field( - None, description="The original name for the generated run command" - ) - - run_command: Optional[str] = Field( - None, description="The command to run the pipeline to this node" - ) - # The transcoded versions of the transcoded data nodes. - transcoded_versions: Set[AbstractDataset] = Field( - set(), description="The transcoded versions of the transcoded data nodes" - ) - - # The type for data node - type: str = GraphNodeType.DATA.value - - def has_metadata(self) -> bool: - return True - - -class DataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a DataNode - - Args: - data_node (DataNode): Data node to which this metadata belongs to. - - Attributes: - is_all_previews_enabled (bool): Class-level attribute to determine if - previews are enabled for all nodes. This can be configured via CLI - or UI to manage the preview settings. - - Raises: - AssertionError: If data_node is not supplied during instantiation - """ - - data_node: DataNode = Field(..., exclude=True) - - is_all_previews_enabled: ClassVar[bool] = True - - type: Optional[str] = Field( - default=None, validate_default=True, description="The type of the data node" - ) - - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="The path to the actual data file for the underlying dataset", - ) - - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="Command to run the pipeline to this node", - ) - - preview: Optional[Union[Dict, str]] = Field( - default=None, - validate_default=True, - description="Preview data for the underlying datanode", - ) - - preview_type: Optional[str] = Field( - default=None, - validate_default=True, - description="Type of preview for the dataset", - ) - - stats: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The statistics for the data node.", - ) - - @model_validator(mode="before") - @classmethod - def check_data_node_exists(cls, values): - assert "data_node" in values - cls.set_data_node_and_dataset(values["data_node"]) - return values - - @classmethod - def set_is_all_previews_enabled(cls, value: bool): - cls.is_all_previews_enabled = value - - @classmethod - def set_data_node_and_dataset(cls, data_node): - cls.data_node = data_node - cls.dataset = cast(AbstractDataset, data_node.kedro_obj) - - # dataset.release clears the cache before loading to ensure that this issue - # does not arise: https://github.com/kedro-org/kedro-viz/pull/573. - cls.dataset.release() - - @field_validator("type") - @classmethod - def set_type(cls, _): - return cls.data_node.dataset_type - - @field_validator("filepath") - @classmethod - def set_filepath(cls, _): - dataset_description = cls.dataset._describe() - return _parse_filepath(dataset_description) - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - if not cls.data_node.is_free_input: - return f"kedro run --to-outputs={cls.data_node.name}" - return None - - @field_validator("preview") - @classmethod - def set_preview(cls, _): - if ( - not cls.data_node.is_preview_enabled() - or not hasattr(cls.dataset, "preview") - or not cls.is_all_previews_enabled - ): - return None - - try: - preview_args = ( - cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None - ) - if preview_args is None: - return cls.dataset.preview() - return cls.dataset.preview(**preview_args) - - except Exception as exc: # pylint: disable=broad-except - logger.warning( - "'%s' could not be previewed. Full exception: %s: %s", - cls.data_node.name, - type(exc).__name__, - exc, - ) - return None - - @field_validator("preview_type") - @classmethod - def set_preview_type(cls, _): - if ( - not cls.data_node.is_preview_enabled() - or not hasattr(cls.dataset, "preview") - or not cls.is_all_previews_enabled - ): - return None - - try: - preview_type_annotation = inspect.signature( - cls.dataset.preview - ).return_annotation - # Attempt to get the name attribute, if it exists. - # Otherwise, use str to handle the annotation directly. - preview_type_name = getattr( - preview_type_annotation, "__name__", str(preview_type_annotation) - ) - return preview_type_name - - except Exception as exc: # pylint: disable=broad-except # pragma: no cover - logger.warning( - "'%s' did not have preview type. Full exception: %s: %s", - cls.data_node.name, - type(exc).__name__, - exc, - ) - return None - - @field_validator("stats") - @classmethod - def set_stats(cls, _): - return cls.data_node.stats - - -class TranscodedDataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TranscodedDataNode - Args: - transcoded_data_node (TranscodedDataNode): The underlying transcoded - data node to which this metadata belongs to. - - Raises: - AssertionError: If transcoded_data_node is not supplied during instantiation - """ - - transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) - - # Only available if the dataset has filepath set. - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="The path to the actual data file for the underlying dataset", - ) - - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="Command to run the pipeline to this node", - ) - original_type: Optional[str] = Field( - default=None, - validate_default=True, - description="The dataset type of the underlying transcoded data node original version", - ) - transcoded_types: Optional[List[str]] = Field( - default=None, - validate_default=True, - description="The list of all dataset types for the transcoded versions", - ) - - # Statistics for the underlying data node - stats: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The statistics for the transcoded data node metadata.", - ) - - @model_validator(mode="before") - @classmethod - def check_transcoded_data_node_exists(cls, values): - assert "transcoded_data_node" in values - cls.transcoded_data_node = values["transcoded_data_node"] - return values - - @field_validator("filepath") - @classmethod - def set_filepath(cls, _): - dataset_description = cls.transcoded_data_node.original_version._describe() - return _parse_filepath(dataset_description) - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - if not cls.transcoded_data_node.is_free_input: - return f"kedro run --to-outputs={cls.transcoded_data_node.original_name}" - return None - - @field_validator("original_type") - @classmethod - def set_original_type(cls, _): - return get_dataset_type(cls.transcoded_data_node.original_version) - - @field_validator("transcoded_types") - @classmethod - def set_transcoded_types(cls, _): - return [ - get_dataset_type(transcoded_version) - for transcoded_version in cls.transcoded_data_node.transcoded_versions - ] - - @field_validator("stats") - @classmethod - def set_stats(cls, _): - return cls.transcoded_data_node.stats - - -class ParametersNode(GraphNode): - """Represent a graph node of type parameters - Args: - layer (Optional[str]): The layer that this parameters node belongs to. Defaults to `None`. - - Raises: - AssertionError: If kedro_obj, name are not supplied during instantiation - """ - - layer: Optional[str] = Field( - None, description="The layer that this parameters node belongs to" - ) - - # The type for Parameters Node - type: str = GraphNodeType.PARAMETERS.value - - @model_validator(mode="before") - @classmethod - def check_kedro_obj_and_name_exists(cls, values): - assert "kedro_obj" in values - assert "name" in values - return values - - def is_all_parameters(self) -> bool: - """Check whether the graph node represent all parameters in the pipeline""" - return self.name == "parameters" - - def is_single_parameter(self) -> bool: - """Check whether the graph node represent a single parameter in the pipeline""" - return not self.is_all_parameters() - - @property - def parameter_name(self) -> str: - """Get a normalised parameter name without the "params:" prefix""" - return self.name.replace("params:", "") - - @property - def parameter_value(self) -> Any: - """Load the parameter value from the underlying dataset""" - if not (self.kedro_obj and hasattr(self.kedro_obj, "load")): - return None - - try: - actual_parameter_value = self.kedro_obj.load() - # Return only json serializable value - return jsonable_encoder(actual_parameter_value) - except (TypeError, ValueError, RecursionError): - # In case the parameter is not JSON serializable, - # return the string representation - return str(actual_parameter_value) - except (AttributeError, DatasetError): - # This except clause triggers if the user passes a parameter that is not - # defined in the catalog (DatasetError) it also catches any case where - # the kedro_obj is None (AttributeError) -- GH#1231 - logger.warning( - "Cannot find parameter `%s` in the catalog.", self.parameter_name - ) - return None - # pylint: disable=broad-exception-caught - except Exception as exc: # pragma: no cover - logger.error( - "An error occurred when loading parameter `%s` in the catalog :: %s", - self.parameter_name, - exc, - ) - return None - - -class ParametersNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a ParametersNode - - Args: - parameters_node (ParametersNode): The underlying parameters node - for the parameters metadata node. - - Raises: - AssertionError: If parameters_node is not supplied during instantiation - """ - - parameters_node: ParametersNode = Field(..., exclude=True) - parameters: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The parameters dictionary for the parameters metadata node", - ) - - @model_validator(mode="before") - @classmethod - def check_parameters_node_exists(cls, values): - assert "parameters_node" in values - cls.parameters_node = values["parameters_node"] - return values - - @field_validator("parameters") - @classmethod - def set_parameters(cls, _): - if cls.parameters_node.is_single_parameter(): - return { - cls.parameters_node.parameter_name: cls.parameters_node.parameter_value - } - return cls.parameters_node.parameter_value - - -class GraphEdge(BaseModel, frozen=True): - """Represent an edge in the graph - - Args: - source (str): The id of the source node. - target (str): The id of the target node. - """ - - source: str - target: str diff --git a/package/kedro_viz/models/flowchart/__init__.py b/package/kedro_viz/models/flowchart/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/package/kedro_viz/models/flowchart/edges.py b/package/kedro_viz/models/flowchart/edges.py new file mode 100644 index 0000000000..27b5aebdf5 --- /dev/null +++ b/package/kedro_viz/models/flowchart/edges.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + +class GraphEdge(BaseModel, frozen=True): + """Represent an edge in the graph + + Args: + source (str): The id of the source node. + target (str): The id of the target node. + """ + + source: str + target: str diff --git a/package/kedro_viz/models/flowchart/metadata.py b/package/kedro_viz/models/flowchart/metadata.py new file mode 100644 index 0000000000..89e2c913b0 --- /dev/null +++ b/package/kedro_viz/models/flowchart/metadata.py @@ -0,0 +1,64 @@ +from pydantic import BaseModel, Field +from typing import Optional, Dict, Union, ClassVar +from pathlib import Path +import inspect +from kedro.pipeline.node import Node as KedroNode +from kedro.io.core import AbstractDataset +from kedro_viz.models.utils import get_dataset_type +from kedro_viz.models.flowchart.nodes import TaskNode, DataNode, TranscodedDataNode, ParametersNode +from kedro_viz.utils import _parse_filepath + +class GraphNodeMetadata(BaseModel): + """Base class for node metadata.""" + +class TaskNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a TaskNode.""" + + task_node: TaskNode = Field(..., exclude=True) + code: Optional[str] = Field(default=None) + filepath: Optional[str] = Field(default=None) + + @classmethod + def set_code(cls, _): + if inspect.isfunction(cls.task_node.kedro_obj.func): + return inspect.getsource(cls.task_node.kedro_obj.func) + return None + + @classmethod + def set_filepath(cls, _): + if inspect.isfunction(cls.task_node.kedro_obj.func): + return str(Path(inspect.getfile(cls.task_node.kedro_obj.func))) + return None + +class DataNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a DataNode.""" + + data_node: DataNode = Field(..., exclude=True) + filepath: Optional[str] = Field(default=None) + preview: Optional[Union[Dict, str]] = Field(default=None) + + @classmethod + def set_filepath(cls, _): + return _parse_filepath(cls.data_node.kedro_obj._describe()) + +class TranscodedDataNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a TranscodedDataNode.""" + + transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) + filepath: Optional[str] = Field(default=None) + + @classmethod + def set_filepath(cls, _): + return _parse_filepath(cls.transcoded_data_node.original_version._describe()) + +class ParametersNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a ParametersNode.""" + + parameters_node: ParametersNode = Field(..., exclude=True) + parameters: Optional[Dict] = Field(default=None) + + @classmethod + def set_parameters(cls, _): + if cls.parameters_node.is_single_parameter(): + return {cls.parameters_node.parameter_name: cls.parameters_node.parameter_value} + return cls.parameters_node.parameter_value diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py new file mode 100644 index 0000000000..5723894c2b --- /dev/null +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -0,0 +1,100 @@ +from abc import ABC +from pydantic import BaseModel, Field +from typing import Set, Optional, Dict, Union +from kedro.pipeline.node import Node as KedroNode +from kedro.io.core import AbstractDataset +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding + + +class GraphNode(BaseModel, ABC): + """Represent a node in the graph representation of a Kedro pipeline.""" + + id: str + name: str + type: str + tags: Set[str] = Field(set()) + kedro_obj: Optional[Union[KedroNode, AbstractDataset]] = Field(None, exclude=True) + pipelines: Set[str] = Field(set()) + modular_pipelines: Optional[Set[str]] = Field(default=None) + + @classmethod + def create_task_node( + cls, node: KedroNode, node_id: str, modular_pipelines: Optional[Set[str]] + ) -> "TaskNode": + """Create a graph node of type task for a given Kedro Node instance.""" + node_name = node._name or node._func_name + return TaskNode( + id=node_id, + name=node_name, + tags=set(node.tags), + kedro_obj=node, + modular_pipelines=modular_pipelines, + ) + + @classmethod + def create_data_node( + cls, + dataset_id: str, + dataset_name: str, + layer: Optional[str], + tags: Set[str], + dataset: AbstractDataset, + stats: Optional[Dict], + modular_pipelines: Optional[Set[str]], + is_free_input: bool = False, + ) -> Union["DataNode", "TranscodedDataNode"]: + """Create a graph node of type data for a given Kedro Dataset instance.""" + is_transcoded_dataset = TRANSCODING_SEPARATOR in dataset_name + if is_transcoded_dataset: + name = _strip_transcoding(dataset_name) + return TranscodedDataNode( + id=dataset_id, + name=name, + tags=tags, + layer=layer, + is_free_input=is_free_input, + stats=stats, + modular_pipelines=modular_pipelines, + ) + return DataNode( + id=dataset_id, + name=dataset_name, + tags=tags, + layer=layer, + kedro_obj=dataset, + is_free_input=is_free_input, + stats=stats, + modular_pipelines=modular_pipelines, + ) + + +class TaskNode(GraphNode): + """Represent a graph node of type task.""" + + parameters: Dict = Field({}, description="A dictionary of parameter values") + type: str = "task" + + +class DataNode(GraphNode): + """Represent a graph node of type data.""" + + layer: Optional[str] = Field(None) + is_free_input: bool = Field(False) + stats: Optional[Dict] = Field(None) + + +class TranscodedDataNode(GraphNode): + """Represent a graph node of type transcoded data.""" + + layer: Optional[str] = Field(None) + is_free_input: bool = Field(False) + stats: Optional[Dict] = Field(None) + original_version: Optional[AbstractDataset] = Field( + None, description="The original Kedro's AbstractDataset for this transcoded data node" + ) + original_name: Optional[str] = Field( + None, description="The original name for the generated run command" + ) + transcoded_versions: Set[AbstractDataset] = Field( + set(), description="The transcoded versions of the transcoded data nodes" + ) diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 4eab727e80..715e4bc716 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -4,7 +4,7 @@ from graphlib import CycleError, TopologicalSorter from typing import Dict, List, Set -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.flowchart import GraphNode logger = logging.getLogger(__name__) diff --git a/package/tests/conftest.py b/package/tests/conftest.py index d63fca7fd3..d7b81d1492 100644 --- a/package/tests/conftest.py +++ b/package/tests/conftest.py @@ -21,7 +21,7 @@ ) from kedro_viz.integrations.kedro.hooks import DatasetStatsHook from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore -from kedro_viz.models.flowchart import DataNodeMetadata, GraphNode +from kedro_viz.models.flowchart.flowchart import DataNodeMetadata, GraphNode from kedro_viz.server import populate_data diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py index 3f75904404..43903a76a8 100644 --- a/package/tests/test_api/test_rest/test_responses.py +++ b/package/tests/test_api/test_rest/test_responses.py @@ -20,7 +20,7 @@ save_api_responses_to_fs, write_api_response_to_fs, ) -from kedro_viz.models.flowchart import TaskNode +from kedro_viz.models.flowchart.flowchart import TaskNode from kedro_viz.models.metadata import Metadata diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index 66bd08f1e9..0dc2d75807 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -15,7 +15,7 @@ ModularPipelinesRepository, ) from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( DataNode, GraphEdge, ParametersNode, diff --git a/package/tests/test_data_access/test_repositories/test_graph.py b/package/tests/test_data_access/test_repositories/test_graph.py index c45232ebd1..905c99301d 100644 --- a/package/tests/test_data_access/test_repositories/test_graph.py +++ b/package/tests/test_data_access/test_repositories/test_graph.py @@ -4,7 +4,7 @@ GraphEdgesRepository, GraphNodesRepository, ) -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.flowchart import GraphEdge, GraphNode class TestGraphNodeRepository: diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index 5b5a5e783b..8fb4856298 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,7 +6,7 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( GraphNodeType, ModularPipelineChild, ModularPipelineNode, diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index 01238f286d..86acbb4127 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -9,7 +9,7 @@ from kedro_datasets.pandas import CSVDataset, ParquetDataset from kedro_datasets.partitions.partitioned_dataset import PartitionedDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.flowchart import ( DataNode, DataNodeMetadata, GraphNode, diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index 80d76fae5a..b01bb007b0 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -1,6 +1,6 @@ import pytest -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.flowchart import GraphNode from kedro_viz.services.layers import sort_layers From 1d87c5334ccc10f7bd94895be5495a17227bbe07 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 18 Oct 2024 14:25:35 +0100 Subject: [PATCH 02/18] update Signed-off-by: Sajid Alam --- package/kedro_viz/api/rest/responses.py | 2 +- package/kedro_viz/data_access/managers.py | 2 +- .../data_access/repositories/graph.py | 2 +- .../repositories/modular_pipelines.py | 2 +- .../repositories/registered_pipelines.py | 2 +- .../data_access/repositories/tags.py | 2 +- package/kedro_viz/models/flowchart/edges.py | 1 + .../kedro_viz/models/flowchart/entities.py | 30 ++ package/kedro_viz/models/flowchart/enums.py | 10 + .../kedro_viz/models/flowchart/metadata.py | 333 ++++++++++++++++-- package/kedro_viz/models/flowchart/nodes.py | 267 ++++++++++++-- package/kedro_viz/models/flowchart/utils.py | 35 ++ package/kedro_viz/services/layers.py | 2 +- package/tests/conftest.py | 2 +- .../test_api/test_rest/test_responses.py | 2 +- .../tests/test_data_access/test_managers.py | 2 +- .../test_repositories/test_graph.py | 2 +- .../test_modular_pipelines.py | 2 +- package/tests/test_models/test_flowchart.py | 2 +- package/tests/test_services/test_layers.py | 2 +- 20 files changed, 633 insertions(+), 71 deletions(-) create mode 100644 package/kedro_viz/models/flowchart/entities.py create mode 100644 package/kedro_viz/models/flowchart/enums.py create mode 100644 package/kedro_viz/models/flowchart/utils.py diff --git a/package/kedro_viz/api/rest/responses.py b/package/kedro_viz/api/rest/responses.py index eb0c7e02bc..2f59d33b16 100644 --- a/package/kedro_viz/api/rest/responses.py +++ b/package/kedro_viz/api/rest/responses.py @@ -13,7 +13,7 @@ from kedro_viz.api.rest.utils import get_package_compatibilities from kedro_viz.data_access import data_access_manager -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( DataNode, DataNodeMetadata, ParametersNodeMetadata, diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index f89894fadb..40e00ebe55 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -14,7 +14,7 @@ from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( DataNode, GraphEdge, GraphNode, diff --git a/package/kedro_viz/data_access/repositories/graph.py b/package/kedro_viz/data_access/repositories/graph.py index b0fd60c348..90f734ec1d 100644 --- a/package/kedro_viz/data_access/repositories/graph.py +++ b/package/kedro_viz/data_access/repositories/graph.py @@ -3,7 +3,7 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Dict, Generator, List, Optional, Set -from kedro_viz.models.flowchart.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart import GraphEdge, GraphNode class GraphNodesRepository: diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index c5ab6d7152..25b7645ff4 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -9,7 +9,7 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( GraphNode, GraphNodeType, ModularPipelineChild, diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index 3e72766a62..16cdd98adf 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -4,7 +4,7 @@ from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart.flowchart import RegisteredPipeline +from kedro_viz.models.flowchart import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/data_access/repositories/tags.py b/package/kedro_viz/data_access/repositories/tags.py index 844c64ffdc..eae5c68bb0 100644 --- a/package/kedro_viz/data_access/repositories/tags.py +++ b/package/kedro_viz/data_access/repositories/tags.py @@ -3,7 +3,7 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Iterable, List, Set -from kedro_viz.models.flowchart.flowchart import Tag +from kedro_viz.models.flowchart import Tag class TagsRepository: diff --git a/package/kedro_viz/models/flowchart/edges.py b/package/kedro_viz/models/flowchart/edges.py index 27b5aebdf5..9f9757501f 100644 --- a/package/kedro_viz/models/flowchart/edges.py +++ b/package/kedro_viz/models/flowchart/edges.py @@ -1,5 +1,6 @@ from pydantic import BaseModel + class GraphEdge(BaseModel, frozen=True): """Represent an edge in the graph diff --git a/package/kedro_viz/models/flowchart/entities.py b/package/kedro_viz/models/flowchart/entities.py new file mode 100644 index 0000000000..cbc13d872a --- /dev/null +++ b/package/kedro_viz/models/flowchart/entities.py @@ -0,0 +1,30 @@ +from pydantic import BaseModel, Field, ValidationInfo, field_validator +from typing import Optional + + +class NamedEntity(BaseModel): + """Represent a named entity (Tag/Registered Pipeline) in a Kedro project.""" + + id: str + name: Optional[str] = Field( + default=None, + validate_default=True, + description="The name of the entity", + ) + + @field_validator("name") + @classmethod + def set_name(cls, _, info: ValidationInfo): + assert "id" in info.data + return info.data["id"] + + +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" + + +class Tag(NamedEntity): + """Represent a tag in a Kedro project.""" + + def __hash__(self) -> int: + return hash(self.id) diff --git a/package/kedro_viz/models/flowchart/enums.py b/package/kedro_viz/models/flowchart/enums.py new file mode 100644 index 0000000000..928d36fa9d --- /dev/null +++ b/package/kedro_viz/models/flowchart/enums.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class GraphNodeType(str, Enum): + """Represent all possible node types in the graph representation of a Kedro pipeline.""" + + TASK = "task" + DATA = "data" + PARAMETERS = "parameters" + MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility diff --git a/package/kedro_viz/models/flowchart/metadata.py b/package/kedro_viz/models/flowchart/metadata.py index 89e2c913b0..2f2a43a614 100644 --- a/package/kedro_viz/models/flowchart/metadata.py +++ b/package/kedro_viz/models/flowchart/metadata.py @@ -1,64 +1,343 @@ -from pydantic import BaseModel, Field -from typing import Optional, Dict, Union, ClassVar -from pathlib import Path -import inspect -from kedro.pipeline.node import Node as KedroNode -from kedro.io.core import AbstractDataset -from kedro_viz.models.utils import get_dataset_type -from kedro_viz.models.flowchart.nodes import TaskNode, DataNode, TranscodedDataNode, ParametersNode -from kedro_viz.utils import _parse_filepath - -class GraphNodeMetadata(BaseModel): - """Base class for node metadata.""" +# kedro_viz/models/metadata.py + +from abc import ABC +from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator +from typing import Optional, Dict, Any, Union, List, ClassVar, cast +import logging + +try: + # kedro 0.18.11 onwards + from kedro.io.core import DatasetError +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import DataSetError as DatasetError # type: ignore + +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore + +from .nodes import TaskNode, DataNode, TranscodedDataNode, ParametersNode +from .utils import _extract_wrapped_func, _parse_filepath, get_dataset_type + +logger = logging.getLogger(__name__) + + +class GraphNodeMetadata(BaseModel, ABC): + """Abstract base class representing metadata of a graph node.""" + class TaskNodeMetadata(GraphNodeMetadata): """Represent the metadata of a TaskNode.""" task_node: TaskNode = Field(..., exclude=True) - code: Optional[str] = Field(default=None) - filepath: Optional[str] = Field(default=None) + code: Optional[str] = Field( + default=None, + validate_default=True, + description="Source code of the node's function", + ) + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="Path to the file where the node is defined", + ) + parameters: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The parameters of the node, if available", + ) + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="The command to run the pipeline to this node", + ) + inputs: Optional[List[str]] = Field( + default=None, validate_default=True, description="The inputs to the TaskNode" + ) + outputs: Optional[List[str]] = Field( + default=None, validate_default=True, description="The outputs from the TaskNode" + ) + @model_validator(mode="before") @classmethod - def set_code(cls, _): - if inspect.isfunction(cls.task_node.kedro_obj.func): - return inspect.getsource(cls.task_node.kedro_obj.func) + def check_task_node_exists(cls, values): + assert "task_node" in values + cls.set_task_and_kedro_node(values["task_node"]) + return values + + @classmethod + def set_task_and_kedro_node(cls, task_node): + cls.task_node = task_node + cls.kedro_node = cast(KedroNode, task_node.kedro_obj) + + @field_validator("code") + @classmethod + def set_code(cls, code): + if inspect.isfunction(cls.kedro_node.func): + code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) + return code return None + @field_validator("filepath") @classmethod - def set_filepath(cls, _): - if inspect.isfunction(cls.task_node.kedro_obj.func): - return str(Path(inspect.getfile(cls.task_node.kedro_obj.func))) + def set_filepath(cls, filepath): + if inspect.isfunction(cls.kedro_node.func): + code_full_path = ( + Path(inspect.getfile(cls.kedro_node.func)).expanduser().resolve() + ) + + try: + filepath = code_full_path.relative_to(Path.cwd().parent) + except ValueError: + filepath = code_full_path + + return str(filepath) return None + @field_validator("parameters") + @classmethod + def set_parameters(cls, _): + return cls.task_node.parameters + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + return f"kedro run --to-nodes='{cls.kedro_node.name}'" + + @field_validator("inputs") + @classmethod + def set_inputs(cls, _): + return cls.kedro_node.inputs + + @field_validator("outputs") + @classmethod + def set_outputs(cls, _): + return cls.kedro_node.outputs + + class DataNodeMetadata(GraphNodeMetadata): """Represent the metadata of a DataNode.""" data_node: DataNode = Field(..., exclude=True) - filepath: Optional[str] = Field(default=None) - preview: Optional[Union[Dict, str]] = Field(default=None) + is_all_previews_enabled: ClassVar[bool] = True + type: Optional[str] = Field( + default=None, validate_default=True, description="The type of the data node" + ) + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="The path to the actual data file for the underlying dataset", + ) + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="Command to run the pipeline to this node", + ) + preview: Optional[Union[Dict, str]] = Field( + default=None, + validate_default=True, + description="Preview data for the underlying data node", + ) + preview_type: Optional[str] = Field( + default=None, + validate_default=True, + description="Type of preview for the dataset", + ) + stats: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The statistics for the data node.", + ) + + @model_validator(mode="before") + @classmethod + def check_data_node_exists(cls, values): + assert "data_node" in values + cls.set_data_node_and_dataset(values["data_node"]) + return values + + @classmethod + def set_is_all_previews_enabled(cls, value: bool): + cls.is_all_previews_enabled = value + + @classmethod + def set_data_node_and_dataset(cls, data_node): + cls.data_node = data_node + cls.dataset = cast(AbstractDataset, data_node.kedro_obj) + cls.dataset.release() + @field_validator("type") + @classmethod + def set_type(cls, _): + return cls.data_node.dataset_type + + @field_validator("filepath") @classmethod def set_filepath(cls, _): - return _parse_filepath(cls.data_node.kedro_obj._describe()) + dataset_description = cls.dataset._describe() + return _parse_filepath(dataset_description) + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + if not cls.data_node.is_free_input: + return f"kedro run --to-outputs={cls.data_node.name}" + return None + + @field_validator("preview") + @classmethod + def set_preview(cls, _): + if ( + not cls.data_node.is_preview_enabled() + or not hasattr(cls.dataset, "preview") + or not cls.is_all_previews_enabled + ): + return None + + try: + preview_args = ( + cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None + ) + if preview_args is None: + return cls.dataset.preview() + return cls.dataset.preview(**preview_args) + + except Exception as exc: + logger.warning( + "'%s' could not be previewed. Full exception: %s: %s", + cls.data_node.name, + type(exc).__name__, + exc, + ) + return None + + @field_validator("preview_type") + @classmethod + def set_preview_type(cls, _): + if ( + not cls.data_node.is_preview_enabled() + or not hasattr(cls.dataset, "preview") + or not cls.is_all_previews_enabled + ): + return None + + try: + preview_type_annotation = inspect.signature( + cls.dataset.preview + ).return_annotation + preview_type_name = getattr( + preview_type_annotation, "__name__", str(preview_type_annotation) + ) + return preview_type_name + + except Exception as exc: + logger.warning( + "'%s' did not have preview type. Full exception: %s: %s", + cls.data_node.name, + type(exc).__name__, + exc, + ) + return None + + @field_validator("stats") + @classmethod + def set_stats(cls, _): + return cls.data_node.stats + class TranscodedDataNodeMetadata(GraphNodeMetadata): """Represent the metadata of a TranscodedDataNode.""" transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) - filepath: Optional[str] = Field(default=None) + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="The path to the actual data file for the underlying dataset", + ) + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="Command to run the pipeline to this node", + ) + original_type: Optional[str] = Field( + default=None, + validate_default=True, + description="The dataset type of the original version", + ) + transcoded_types: Optional[List[str]] = Field( + default=None, + validate_default=True, + description="The list of all dataset types for the transcoded versions", + ) + stats: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The statistics for the transcoded data node metadata.", + ) + + @model_validator(mode="before") + @classmethod + def check_transcoded_data_node_exists(cls, values): + assert "transcoded_data_node" in values + cls.transcoded_data_node = values["transcoded_data_node"] + return values + @field_validator("filepath") @classmethod def set_filepath(cls, _): - return _parse_filepath(cls.transcoded_data_node.original_version._describe()) + dataset_description = cls.transcoded_data_node.original_version._describe() + return _parse_filepath(dataset_description) + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + if not cls.transcoded_data_node.is_free_input: + return f"kedro run --to-outputs={cls.transcoded_data_node.original_name}" + return None + + @field_validator("original_type") + @classmethod + def set_original_type(cls, _): + return get_dataset_type(cls.transcoded_data_node.original_version) + + @field_validator("transcoded_types") + @classmethod + def set_transcoded_types(cls, _): + return [ + get_dataset_type(transcoded_version) + for transcoded_version in cls.transcoded_data_node.transcoded_versions + ] + + @field_validator("stats") + @classmethod + def set_stats(cls, _): + return cls.transcoded_data_node.stats + class ParametersNodeMetadata(GraphNodeMetadata): """Represent the metadata of a ParametersNode.""" parameters_node: ParametersNode = Field(..., exclude=True) - parameters: Optional[Dict] = Field(default=None) + parameters: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The parameters dictionary for the parameters metadata node", + ) + + @model_validator(mode="before") + @classmethod + def check_parameters_node_exists(cls, values): + assert "parameters_node" in values + cls.parameters_node = values["parameters_node"] + return values + @field_validator("parameters") @classmethod def set_parameters(cls, _): if cls.parameters_node.is_single_parameter(): - return {cls.parameters_node.parameter_name: cls.parameters_node.parameter_value} + return { + cls.parameters_node.parameter_name: cls.parameters_node.parameter_value + } return cls.parameters_node.parameter_value diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index 5723894c2b..a7264d8449 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,27 +1,64 @@ +# kedro_viz/models/nodes.py + from abc import ABC -from pydantic import BaseModel, Field -from typing import Set, Optional, Dict, Union -from kedro.pipeline.node import Node as KedroNode -from kedro.io.core import AbstractDataset +from pydantic import ( + BaseModel, + Field, + ConfigDict, + ValidationInfo, + field_validator, + model_validator, +) +from typing import Optional, Set, Union, Dict, Any, ClassVar, List, cast +from fastapi.encoders import jsonable_encoder +import logging + +try: + # kedro 0.18.11 onwards + from kedro.pipeline.node import Node as KedroNode +except ImportError: # pragma: no cover + # Handle older versions or custom implementations + KedroNode = Any # Replace with appropriate import or definition + +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore + +from .entities import NamedEntity +from .enums import GraphNodeType +from .utils import _parse_filepath, _extract_wrapped_func, get_dataset_type from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding +from .modular_pipelines import ModularPipelineChild + +logger = logging.getLogger(__name__) class GraphNode(BaseModel, ABC): - """Represent a node in the graph representation of a Kedro pipeline.""" + """Abstract base class representing a node in the graph.""" id: str name: str type: str tags: Set[str] = Field(set()) - kedro_obj: Optional[Union[KedroNode, AbstractDataset]] = Field(None, exclude=True) + kedro_obj: Optional[Union[KedroNode, AbstractDataset]] = Field( + None, + description="The underlying Kedro object for each graph node, if any", + exclude=True, + ) pipelines: Set[str] = Field(set()) - modular_pipelines: Optional[Set[str]] = Field(default=None) + modular_pipelines: Optional[Set[str]] = Field( + default=None, + validate_default=True, + ) + model_config = ConfigDict(arbitrary_types_allowed=True) @classmethod def create_task_node( - cls, node: KedroNode, node_id: str, modular_pipelines: Optional[Set[str]] + cls, node: KedroNode, node_id: str, modular_pipelines: Optional[Set[str]] ) -> "TaskNode": - """Create a graph node of type task for a given Kedro Node instance.""" node_name = node._name or node._func_name return TaskNode( id=node_id, @@ -33,17 +70,16 @@ def create_task_node( @classmethod def create_data_node( - cls, - dataset_id: str, - dataset_name: str, - layer: Optional[str], - tags: Set[str], - dataset: AbstractDataset, - stats: Optional[Dict], - modular_pipelines: Optional[Set[str]], - is_free_input: bool = False, + cls, + dataset_id: str, + dataset_name: str, + layer: Optional[str], + tags: Set[str], + dataset: AbstractDataset, + stats: Optional[Dict], + modular_pipelines: Optional[Set[str]], + is_free_input: bool = False, ) -> Union["DataNode", "TranscodedDataNode"]: - """Create a graph node of type data for a given Kedro Dataset instance.""" is_transcoded_dataset = TRANSCODING_SEPARATOR in dataset_name if is_transcoded_dataset: name = _strip_transcoding(dataset_name) @@ -56,6 +92,7 @@ def create_data_node( stats=stats, modular_pipelines=modular_pipelines, ) + return DataNode( id=dataset_id, name=dataset_name, @@ -67,34 +104,204 @@ def create_data_node( modular_pipelines=modular_pipelines, ) + @classmethod + def create_parameters_node( + cls, + dataset_id: str, + dataset_name: str, + layer: Optional[str], + tags: Set[str], + parameters: AbstractDataset, + modular_pipelines: Optional[Set[str]], + ) -> "ParametersNode": + return ParametersNode( + id=dataset_id, + name=dataset_name, + tags=tags, + layer=layer, + kedro_obj=parameters, + modular_pipelines=modular_pipelines, + ) + + @classmethod + def create_modular_pipeline_node( + cls, modular_pipeline_id: str + ) -> "ModularPipelineNode": + return ModularPipelineNode(id=modular_pipeline_id, name=modular_pipeline_id) + + def add_pipeline(self, pipeline_id: str): + self.pipelines.add(pipeline_id) + + def belongs_to_pipeline(self, pipeline_id: str) -> bool: + return pipeline_id in self.pipelines + + def has_metadata(self) -> bool: + return self.kedro_obj is not None + class TaskNode(GraphNode): """Represent a graph node of type task.""" - parameters: Dict = Field({}, description="A dictionary of parameter values") - type: str = "task" + parameters: Dict = Field( + {}, description="A dictionary of parameter values for the task node" + ) + type: str = GraphNodeType.TASK.value + namespace: Optional[str] = Field( + default=None, + validate_default=True, + description="The original namespace on this node", + ) + + @model_validator(mode="before") + @classmethod + def check_kedro_obj_exists(cls, values): + assert "kedro_obj" in values + return values + + @field_validator("namespace") + @classmethod + def set_namespace(cls, _, info: ValidationInfo): + return info.data["kedro_obj"].namespace class DataNode(GraphNode): """Represent a graph node of type data.""" - layer: Optional[str] = Field(None) - is_free_input: bool = Field(False) - stats: Optional[Dict] = Field(None) + layer: Optional[str] = Field( + None, description="The layer that this data node belongs to" + ) + is_free_input: bool = Field( + False, description="Determines whether the data node is a free input" + ) + stats: Optional[Dict] = Field(None, description="The statistics for the data node.") + dataset_type: Optional[str] = Field( + default=None, + validate_default=True, + description="The concrete type of the underlying kedro_obj", + ) + viz_metadata: Optional[Dict] = Field( + default=None, validate_default=True, description="The metadata for data node" + ) + run_command: Optional[str] = Field( + None, description="The command to run the pipeline to this node" + ) + type: str = GraphNodeType.DATA.value + + @model_validator(mode="before") + @classmethod + def check_kedro_obj_exists(cls, values): + assert "kedro_obj" in values + return values + + @field_validator("dataset_type") + @classmethod + def set_dataset_type(cls, _, info: ValidationInfo): + kedro_obj = cast(AbstractDataset, info.data.get("kedro_obj")) + return get_dataset_type(kedro_obj) + + @field_validator("viz_metadata") + @classmethod + def set_viz_metadata(cls, _, info: ValidationInfo): + kedro_obj = cast(AbstractDataset, info.data.get("kedro_obj")) + + if hasattr(kedro_obj, "metadata") and kedro_obj.metadata: + return kedro_obj.metadata.get("kedro-viz", None) + + return None + + def get_preview_args(self): + return self.viz_metadata.get("preview_args", None) + + def is_preview_enabled(self): + return ( + self.viz_metadata is None or self.viz_metadata.get("preview") is not False + ) class TranscodedDataNode(GraphNode): - """Represent a graph node of type transcoded data.""" + """Represent a graph node of type data for transcoded datasets.""" - layer: Optional[str] = Field(None) - is_free_input: bool = Field(False) - stats: Optional[Dict] = Field(None) + layer: Optional[str] = Field( + None, description="The layer that this transcoded data node belongs to" + ) + is_free_input: bool = Field( + False, description="Determines whether the transcoded data node is a free input" + ) + stats: Optional[Dict] = Field(None, description="The statistics for the data node.") original_version: Optional[AbstractDataset] = Field( - None, description="The original Kedro's AbstractDataset for this transcoded data node" + None, + description="The original Kedro's AbstractDataset for this transcoded data node", ) original_name: Optional[str] = Field( None, description="The original name for the generated run command" ) + run_command: Optional[str] = Field( + None, description="The command to run the pipeline to this node" + ) transcoded_versions: Set[AbstractDataset] = Field( - set(), description="The transcoded versions of the transcoded data nodes" + set(), description="The transcoded versions of the data nodes" + ) + type: str = GraphNodeType.DATA.value + + def has_metadata(self) -> bool: + return True + + +class ParametersNode(GraphNode): + """Represent a graph node of type parameters.""" + + layer: Optional[str] = Field( + None, description="The layer that this parameters node belongs to" + ) + type: str = GraphNodeType.PARAMETERS.value + + @model_validator(mode="before") + @classmethod + def check_kedro_obj_and_name_exists(cls, values): + assert "kedro_obj" in values + assert "name" in values + return values + + def is_all_parameters(self) -> bool: + return self.name == "parameters" + + def is_single_parameter(self) -> bool: + return not self.is_all_parameters() + + @property + def parameter_name(self) -> str: + return self.name.replace("params:", "") + + @property + def parameter_value(self) -> Any: + if not (self.kedro_obj and hasattr(self.kedro_obj, "load")): + return None + + try: + actual_parameter_value = self.kedro_obj.load() + return jsonable_encoder(actual_parameter_value) + except (TypeError, ValueError, RecursionError): + return str(actual_parameter_value) + except Exception as exc: + logger.error( + "An error occurred when loading parameter `%s` in the catalog :: %s", + self.parameter_name, + exc, + ) + return None + + +class ModularPipelineNode(GraphNode): + """Represent a modular pipeline node in the graph.""" + + modular_pipelines: Optional[Set[str]] = None + children: Set[ModularPipelineChild] = Field( + set(), description="The children for the modular pipeline node" + ) + inputs: Set[str] = Field( + set(), description="The input datasets to the modular pipeline node" + ) + outputs: Set[str] = Field( + set(), description="The output datasets from the modular pipeline node" ) + type: str = GraphNodeType.MODULAR_PIPELINE.value diff --git a/package/kedro_viz/models/flowchart/utils.py b/package/kedro_viz/models/flowchart/utils.py new file mode 100644 index 0000000000..d2bcdbe5a2 --- /dev/null +++ b/package/kedro_viz/models/flowchart/utils.py @@ -0,0 +1,35 @@ +# kedro_viz/models/utils.py + +import inspect +from pathlib import Path +from types import FunctionType +from typing import Any, Dict, Optional, Union +import logging + +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore + +logger = logging.getLogger(__name__) + + +def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: + filepath = dataset_description.get("filepath") or dataset_description.get("path") + return str(filepath) if filepath else None + + +def _extract_wrapped_func(func: FunctionType) -> FunctionType: + """Extract a wrapped decorated function to inspect the source code if available.""" + if func.__closure__ is None: + return func + closure = (c.cell_contents for c in func.__closure__) + wrapped_func = next((c for c in closure if isinstance(c, FunctionType)), None) + return func if wrapped_func is None else wrapped_func + + +def get_dataset_type(dataset: AbstractDataset) -> str: + """Utility function to get the dataset type.""" + return f"{dataset.__class__.__module__}.{dataset.__class__.__qualname__}" diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 715e4bc716..4eab727e80 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -4,7 +4,7 @@ from graphlib import CycleError, TopologicalSorter from typing import Dict, List, Set -from kedro_viz.models.flowchart.flowchart import GraphNode +from kedro_viz.models.flowchart import GraphNode logger = logging.getLogger(__name__) diff --git a/package/tests/conftest.py b/package/tests/conftest.py index d7b81d1492..d63fca7fd3 100644 --- a/package/tests/conftest.py +++ b/package/tests/conftest.py @@ -21,7 +21,7 @@ ) from kedro_viz.integrations.kedro.hooks import DatasetStatsHook from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore -from kedro_viz.models.flowchart.flowchart import DataNodeMetadata, GraphNode +from kedro_viz.models.flowchart import DataNodeMetadata, GraphNode from kedro_viz.server import populate_data diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py index 43903a76a8..3f75904404 100644 --- a/package/tests/test_api/test_rest/test_responses.py +++ b/package/tests/test_api/test_rest/test_responses.py @@ -20,7 +20,7 @@ save_api_responses_to_fs, write_api_response_to_fs, ) -from kedro_viz.models.flowchart.flowchart import TaskNode +from kedro_viz.models.flowchart import TaskNode from kedro_viz.models.metadata import Metadata diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index 0dc2d75807..66bd08f1e9 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -15,7 +15,7 @@ ModularPipelinesRepository, ) from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( DataNode, GraphEdge, ParametersNode, diff --git a/package/tests/test_data_access/test_repositories/test_graph.py b/package/tests/test_data_access/test_repositories/test_graph.py index 905c99301d..c45232ebd1 100644 --- a/package/tests/test_data_access/test_repositories/test_graph.py +++ b/package/tests/test_data_access/test_repositories/test_graph.py @@ -4,7 +4,7 @@ GraphEdgesRepository, GraphNodesRepository, ) -from kedro_viz.models.flowchart.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart import GraphEdge, GraphNode class TestGraphNodeRepository: diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index 8fb4856298..5b5a5e783b 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,7 +6,7 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( GraphNodeType, ModularPipelineChild, ModularPipelineNode, diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index 86acbb4127..01238f286d 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -9,7 +9,7 @@ from kedro_datasets.pandas import CSVDataset, ParquetDataset from kedro_datasets.partitions.partitioned_dataset import PartitionedDataset -from kedro_viz.models.flowchart.flowchart import ( +from kedro_viz.models.flowchart import ( DataNode, DataNodeMetadata, GraphNode, diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index b01bb007b0..80d76fae5a 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -1,6 +1,6 @@ import pytest -from kedro_viz.models.flowchart.flowchart import GraphNode +from kedro_viz.models.flowchart import GraphNode from kedro_viz.services.layers import sort_layers From 0e038e1ad9c3979488d1205fa057f5b52e3981bd Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 18 Oct 2024 14:52:28 +0100 Subject: [PATCH 03/18] split into modular pipelines Signed-off-by: Sajid Alam --- .../models/flowchart/modular_pipelines.py | 28 +++++++++++++++++++ package/kedro_viz/models/flowchart/utils.py | 2 -- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 package/kedro_viz/models/flowchart/modular_pipelines.py diff --git a/package/kedro_viz/models/flowchart/modular_pipelines.py b/package/kedro_viz/models/flowchart/modular_pipelines.py new file mode 100644 index 0000000000..63330d88a6 --- /dev/null +++ b/package/kedro_viz/models/flowchart/modular_pipelines.py @@ -0,0 +1,28 @@ +from pydantic import BaseModel, Field +from typing import Set, Optional +from .enums import GraphNodeType + + +class ModularPipelineChild(BaseModel): + """Represent a child of a modular pipeline.""" + + id: str + type: GraphNodeType + + +class ModularPipelineNode(BaseModel): + """Represent a modular pipeline node in the graph.""" + + id: str + name: str + modular_pipelines: Optional[Set[str]] = None + children: Set[ModularPipelineChild] = Field( + set(), description="The children for the modular pipeline node" + ) + inputs: Set[str] = Field( + set(), description="The input datasets to the modular pipeline node" + ) + outputs: Set[str] = Field( + set(), description="The output datasets from the modular pipeline node" + ) + type: str = GraphNodeType.MODULAR_PIPELINE.value diff --git a/package/kedro_viz/models/flowchart/utils.py b/package/kedro_viz/models/flowchart/utils.py index d2bcdbe5a2..4040ce4577 100644 --- a/package/kedro_viz/models/flowchart/utils.py +++ b/package/kedro_viz/models/flowchart/utils.py @@ -1,5 +1,3 @@ -# kedro_viz/models/utils.py - import inspect from pathlib import Path from types import FunctionType From c8cf56aa96bc50648881c0cbbb7ca0486dc126dc Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 18 Oct 2024 14:52:48 +0100 Subject: [PATCH 04/18] remove comment Signed-off-by: Sajid Alam --- package/kedro_viz/models/flowchart/metadata.py | 2 -- package/kedro_viz/models/flowchart/nodes.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/package/kedro_viz/models/flowchart/metadata.py b/package/kedro_viz/models/flowchart/metadata.py index 2f2a43a614..2f479ef4c1 100644 --- a/package/kedro_viz/models/flowchart/metadata.py +++ b/package/kedro_viz/models/flowchart/metadata.py @@ -1,5 +1,3 @@ -# kedro_viz/models/metadata.py - from abc import ABC from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator from typing import Optional, Dict, Any, Union, List, ClassVar, cast diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index a7264d8449..7381d0e7d2 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,5 +1,3 @@ -# kedro_viz/models/nodes.py - from abc import ABC from pydantic import ( BaseModel, From 7152c698f593b3c331eaafe03c78e0a9920ef3b0 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 18 Oct 2024 16:22:29 +0100 Subject: [PATCH 05/18] move GraphNodeType to nodes Signed-off-by: Sajid Alam --- package/kedro_viz/models/flowchart/enums.py | 10 ---------- .../kedro_viz/models/flowchart/modular_pipelines.py | 2 +- package/kedro_viz/models/flowchart/nodes.py | 11 ++++++++++- 3 files changed, 11 insertions(+), 12 deletions(-) delete mode 100644 package/kedro_viz/models/flowchart/enums.py diff --git a/package/kedro_viz/models/flowchart/enums.py b/package/kedro_viz/models/flowchart/enums.py deleted file mode 100644 index 928d36fa9d..0000000000 --- a/package/kedro_viz/models/flowchart/enums.py +++ /dev/null @@ -1,10 +0,0 @@ -from enum import Enum - - -class GraphNodeType(str, Enum): - """Represent all possible node types in the graph representation of a Kedro pipeline.""" - - TASK = "task" - DATA = "data" - PARAMETERS = "parameters" - MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility diff --git a/package/kedro_viz/models/flowchart/modular_pipelines.py b/package/kedro_viz/models/flowchart/modular_pipelines.py index 63330d88a6..f970ed8ed6 100644 --- a/package/kedro_viz/models/flowchart/modular_pipelines.py +++ b/package/kedro_viz/models/flowchart/modular_pipelines.py @@ -1,6 +1,6 @@ from pydantic import BaseModel, Field from typing import Set, Optional -from .enums import GraphNodeType +from .nodes import GraphNodeType class ModularPipelineChild(BaseModel): diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index 7381d0e7d2..616b429f6b 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,4 +1,5 @@ from abc import ABC +from enum import Enum from pydantic import ( BaseModel, Field, @@ -26,7 +27,6 @@ from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore from .entities import NamedEntity -from .enums import GraphNodeType from .utils import _parse_filepath, _extract_wrapped_func, get_dataset_type from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding from .modular_pipelines import ModularPipelineChild @@ -34,6 +34,15 @@ logger = logging.getLogger(__name__) +class GraphNodeType(str, Enum): + """Represent all possible node types in the graph representation of a Kedro pipeline.""" + + TASK = "task" + DATA = "data" + PARAMETERS = "parameters" + MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility + + class GraphNode(BaseModel, ABC): """Abstract base class representing a node in the graph.""" From 3c5989409965f1af4eac5ef3fc86879765b7dbc9 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 24 Oct 2024 13:57:07 +0100 Subject: [PATCH 06/18] refactor Signed-off-by: Sajid Alam --- .../models/flowchart/{edges.py => edge.py} | 0 .../kedro_viz/models/flowchart/entities.py | 30 ------------------- .../flowchart/{utils.py => model_utils.py} | 21 ++++++++++++- .../{metadata.py => node_metadata.py} | 9 +++--- package/kedro_viz/models/flowchart/nodes.py | 14 +++++---- .../{modular_pipelines.py => pipelines.py} | 9 +++++- package/kedro_viz/models/flowchart/tag.py | 8 +++++ 7 files changed, 49 insertions(+), 42 deletions(-) rename package/kedro_viz/models/flowchart/{edges.py => edge.py} (100%) delete mode 100644 package/kedro_viz/models/flowchart/entities.py rename package/kedro_viz/models/flowchart/{utils.py => model_utils.py} (70%) rename package/kedro_viz/models/flowchart/{metadata.py => node_metadata.py} (97%) rename package/kedro_viz/models/flowchart/{modular_pipelines.py => pipelines.py} (81%) create mode 100644 package/kedro_viz/models/flowchart/tag.py diff --git a/package/kedro_viz/models/flowchart/edges.py b/package/kedro_viz/models/flowchart/edge.py similarity index 100% rename from package/kedro_viz/models/flowchart/edges.py rename to package/kedro_viz/models/flowchart/edge.py diff --git a/package/kedro_viz/models/flowchart/entities.py b/package/kedro_viz/models/flowchart/entities.py deleted file mode 100644 index cbc13d872a..0000000000 --- a/package/kedro_viz/models/flowchart/entities.py +++ /dev/null @@ -1,30 +0,0 @@ -from pydantic import BaseModel, Field, ValidationInfo, field_validator -from typing import Optional - - -class NamedEntity(BaseModel): - """Represent a named entity (Tag/Registered Pipeline) in a Kedro project.""" - - id: str - name: Optional[str] = Field( - default=None, - validate_default=True, - description="The name of the entity", - ) - - @field_validator("name") - @classmethod - def set_name(cls, _, info: ValidationInfo): - assert "id" in info.data - return info.data["id"] - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project.""" - - -class Tag(NamedEntity): - """Represent a tag in a Kedro project.""" - - def __hash__(self) -> int: - return hash(self.id) diff --git a/package/kedro_viz/models/flowchart/utils.py b/package/kedro_viz/models/flowchart/model_utils.py similarity index 70% rename from package/kedro_viz/models/flowchart/utils.py rename to package/kedro_viz/models/flowchart/model_utils.py index 4040ce4577..5b209f70e6 100644 --- a/package/kedro_viz/models/flowchart/utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -1,8 +1,10 @@ import inspect +import logging from pathlib import Path from types import FunctionType from typing import Any, Dict, Optional, Union -import logging + +from pydantic import BaseModel, Field, ValidationInfo, field_validator try: # kedro 0.18.12 onwards @@ -31,3 +33,20 @@ def _extract_wrapped_func(func: FunctionType) -> FunctionType: def get_dataset_type(dataset: AbstractDataset) -> str: """Utility function to get the dataset type.""" return f"{dataset.__class__.__module__}.{dataset.__class__.__qualname__}" + + +class NamedEntity(BaseModel): + """Represent a named entity (Tag/Registered Pipeline) in a Kedro project.""" + + id: str + name: Optional[str] = Field( + default=None, + validate_default=True, + description="The name of the entity", + ) + + @field_validator("name") + @classmethod + def set_name(cls, _, info: ValidationInfo): + assert "id" in info.data + return info.data["id"] diff --git a/package/kedro_viz/models/flowchart/metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py similarity index 97% rename from package/kedro_viz/models/flowchart/metadata.py rename to package/kedro_viz/models/flowchart/node_metadata.py index 2f479ef4c1..ffaeab07b1 100644 --- a/package/kedro_viz/models/flowchart/metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -1,7 +1,8 @@ +import logging from abc import ABC +from typing import Any, ClassVar, Dict, List, Optional, Union, cast + from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator -from typing import Optional, Dict, Any, Union, List, ClassVar, cast -import logging try: # kedro 0.18.11 onwards @@ -17,8 +18,8 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore -from .nodes import TaskNode, DataNode, TranscodedDataNode, ParametersNode -from .utils import _extract_wrapped_func, _parse_filepath, get_dataset_type +from .model_utils import _extract_wrapped_func, _parse_filepath, get_dataset_type +from .nodes import DataNode, ParametersNode, TaskNode, TranscodedDataNode logger = logging.getLogger(__name__) diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index 616b429f6b..c20c310618 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,16 +1,17 @@ +import logging from abc import ABC from enum import Enum +from typing import Any, ClassVar, Dict, List, Optional, Set, Union, cast + +from fastapi.encoders import jsonable_encoder from pydantic import ( BaseModel, - Field, ConfigDict, + Field, ValidationInfo, field_validator, model_validator, ) -from typing import Optional, Set, Union, Dict, Any, ClassVar, List, cast -from fastapi.encoders import jsonable_encoder -import logging try: # kedro 0.18.11 onwards @@ -26,10 +27,11 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore -from .entities import NamedEntity -from .utils import _parse_filepath, _extract_wrapped_func, get_dataset_type from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding + +from .entities import NamedEntity from .modular_pipelines import ModularPipelineChild +from .utils import _extract_wrapped_func, _parse_filepath, get_dataset_type logger = logging.getLogger(__name__) diff --git a/package/kedro_viz/models/flowchart/modular_pipelines.py b/package/kedro_viz/models/flowchart/pipelines.py similarity index 81% rename from package/kedro_viz/models/flowchart/modular_pipelines.py rename to package/kedro_viz/models/flowchart/pipelines.py index f970ed8ed6..a569c3bb83 100644 --- a/package/kedro_viz/models/flowchart/modular_pipelines.py +++ b/package/kedro_viz/models/flowchart/pipelines.py @@ -1,5 +1,8 @@ +from typing import Optional, Set + from pydantic import BaseModel, Field -from typing import Set, Optional + +from .model_utils import NamedEntity from .nodes import GraphNodeType @@ -26,3 +29,7 @@ class ModularPipelineNode(BaseModel): set(), description="The output datasets from the modular pipeline node" ) type: str = GraphNodeType.MODULAR_PIPELINE.value + + +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" diff --git a/package/kedro_viz/models/flowchart/tag.py b/package/kedro_viz/models/flowchart/tag.py new file mode 100644 index 0000000000..e87a692feb --- /dev/null +++ b/package/kedro_viz/models/flowchart/tag.py @@ -0,0 +1,8 @@ +from .model_utils import NamedEntity + + +class Tag(NamedEntity): + """Represent a tag in a Kedro project.""" + + def __hash__(self) -> int: + return hash(self.id) From 579fc826055b3d25a1ff0d52db117825d2b64f97 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 24 Oct 2024 14:51:15 +0100 Subject: [PATCH 07/18] fix refactors Signed-off-by: Sajid Alam --- .../kedro_viz/models/flowchart/model_utils.py | 13 +- .../models/flowchart/node_metadata.py | 85 ++++++++- package/kedro_viz/models/flowchart/nodes.py | 180 ++++++++++++++++-- .../kedro_viz/models/flowchart/pipelines.py | 17 +- 4 files changed, 266 insertions(+), 29 deletions(-) diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index 5b209f70e6..52098d9ac8 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -22,11 +22,14 @@ def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: def _extract_wrapped_func(func: FunctionType) -> FunctionType: - """Extract a wrapped decorated function to inspect the source code if available.""" + """Extract a wrapped decorated function to inspect the source code if available. + Adapted from https://stackoverflow.com/a/43506509/1684058 + """ if func.__closure__ is None: return func closure = (c.cell_contents for c in func.__closure__) wrapped_func = next((c for c in closure if isinstance(c, FunctionType)), None) + # return the original function if it's not a decorated function return func if wrapped_func is None else wrapped_func @@ -36,7 +39,13 @@ def get_dataset_type(dataset: AbstractDataset) -> str: class NamedEntity(BaseModel): - """Represent a named entity (Tag/Registered Pipeline) in a Kedro project.""" + """Represent a named entity (Tag/Registered Pipeline) in a Kedro project + Args: + id (str): Id of the registered pipeline + + Raises: + AssertionError: If id is not supplied during instantiation + """ id: str name: Optional[str] = Field( diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py index ffaeab07b1..6129a62aaf 100644 --- a/package/kedro_viz/models/flowchart/node_metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -1,7 +1,10 @@ +import inspect import logging from abc import ABC +from pathlib import Path from typing import Any, ClassVar, Dict, List, Optional, Union, cast +from kedro.pipeline.node import Node as KedroNode from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator try: @@ -25,23 +28,33 @@ class GraphNodeMetadata(BaseModel, ABC): - """Abstract base class representing metadata of a graph node.""" + """Represent a graph node's metadata""" class TaskNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TaskNode.""" + """Represent the metadata of a TaskNode + + Args: + task_node (TaskNode): Task node to which this metadata belongs to. + + Raises: + AssertionError: If task_node is not supplied during instantiation + """ task_node: TaskNode = Field(..., exclude=True) + code: Optional[str] = Field( default=None, validate_default=True, description="Source code of the node's function", ) + filepath: Optional[str] = Field( default=None, validate_default=True, description="Path to the file where the node is defined", ) + parameters: Optional[Dict] = Field( default=None, validate_default=True, @@ -52,6 +65,7 @@ class TaskNodeMetadata(GraphNodeMetadata): validate_default=True, description="The command to run the pipeline to this node", ) + inputs: Optional[List[str]] = Field( default=None, validate_default=True, description="The inputs to the TaskNode" ) @@ -74,14 +88,17 @@ def set_task_and_kedro_node(cls, task_node): @field_validator("code") @classmethod def set_code(cls, code): + # this is required to handle partial, curry functions if inspect.isfunction(cls.kedro_node.func): code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) return code + return None @field_validator("filepath") @classmethod def set_filepath(cls, filepath): + # this is required to handle partial, curry functions if inspect.isfunction(cls.kedro_node.func): code_full_path = ( Path(inspect.getfile(cls.kedro_node.func)).expanduser().resolve() @@ -89,10 +106,14 @@ def set_filepath(cls, filepath): try: filepath = code_full_path.relative_to(Path.cwd().parent) - except ValueError: + except ValueError: # pragma: no cover + # if the filepath can't be resolved relative to the current directory, + # e.g. either during tests or during launching development server + # outside of a Kedro project, simply return the fullpath to the file. filepath = code_full_path return str(filepath) + return None @field_validator("parameters") @@ -117,33 +138,52 @@ def set_outputs(cls, _): class DataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a DataNode.""" + """Represent the metadata of a DataNode + + Args: + data_node (DataNode): Data node to which this metadata belongs to. + + Attributes: + is_all_previews_enabled (bool): Class-level attribute to determine if + previews are enabled for all nodes. This can be configured via CLI + or UI to manage the preview settings. + + Raises: + AssertionError: If data_node is not supplied during instantiation + """ data_node: DataNode = Field(..., exclude=True) + is_all_previews_enabled: ClassVar[bool] = True + type: Optional[str] = Field( default=None, validate_default=True, description="The type of the data node" ) + filepath: Optional[str] = Field( default=None, validate_default=True, description="The path to the actual data file for the underlying dataset", ) + run_command: Optional[str] = Field( default=None, validate_default=True, description="Command to run the pipeline to this node", ) + preview: Optional[Union[Dict, str]] = Field( default=None, validate_default=True, - description="Preview data for the underlying data node", + description="Preview data for the underlying datanode", ) + preview_type: Optional[str] = Field( default=None, validate_default=True, description="Type of preview for the dataset", ) + stats: Optional[Dict] = Field( default=None, validate_default=True, @@ -165,6 +205,9 @@ def set_is_all_previews_enabled(cls, value: bool): def set_data_node_and_dataset(cls, data_node): cls.data_node = data_node cls.dataset = cast(AbstractDataset, data_node.kedro_obj) + + # dataset.release clears the cache before loading to ensure that this issue + # does not arise: https://github.com/kedro-org/kedro-viz/pull/573. cls.dataset.release() @field_validator("type") @@ -203,7 +246,7 @@ def set_preview(cls, _): return cls.dataset.preview() return cls.dataset.preview(**preview_args) - except Exception as exc: + except Exception as exc: # pylint: disable=broad-except logger.warning( "'%s' could not be previewed. Full exception: %s: %s", cls.data_node.name, @@ -226,12 +269,14 @@ def set_preview_type(cls, _): preview_type_annotation = inspect.signature( cls.dataset.preview ).return_annotation + # Attempt to get the name attribute, if it exists. + # Otherwise, use str to handle the annotation directly. preview_type_name = getattr( preview_type_annotation, "__name__", str(preview_type_annotation) ) return preview_type_name - except Exception as exc: + except Exception as exc: # pylint: disable=broad-except # pragma: no cover logger.warning( "'%s' did not have preview type. Full exception: %s: %s", cls.data_node.name, @@ -247,14 +292,24 @@ def set_stats(cls, _): class TranscodedDataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TranscodedDataNode.""" + """Represent the metadata of a TranscodedDataNode + Args: + transcoded_data_node (TranscodedDataNode): The underlying transcoded + data node to which this metadata belongs to. + + Raises: + AssertionError: If transcoded_data_node is not supplied during instantiation + """ transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) + + # Only available if the dataset has filepath set. filepath: Optional[str] = Field( default=None, validate_default=True, description="The path to the actual data file for the underlying dataset", ) + run_command: Optional[str] = Field( default=None, validate_default=True, @@ -263,13 +318,15 @@ class TranscodedDataNodeMetadata(GraphNodeMetadata): original_type: Optional[str] = Field( default=None, validate_default=True, - description="The dataset type of the original version", + description="The dataset type of the underlying transcoded data node original version", ) transcoded_types: Optional[List[str]] = Field( default=None, validate_default=True, description="The list of all dataset types for the transcoded versions", ) + + # Statistics for the underlying data node stats: Optional[Dict] = Field( default=None, validate_default=True, @@ -316,7 +373,15 @@ def set_stats(cls, _): class ParametersNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a ParametersNode.""" + """Represent the metadata of a ParametersNode + + Args: + parameters_node (ParametersNode): The underlying parameters node + for the parameters metadata node. + + Raises: + AssertionError: If parameters_node is not supplied during instantiation + """ parameters_node: ParametersNode = Field(..., exclude=True) parameters: Optional[Dict] = Field( diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index c20c310618..ff9e2408e8 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -37,7 +37,10 @@ class GraphNodeType(str, Enum): - """Represent all possible node types in the graph representation of a Kedro pipeline.""" + """Represent all possible node types in the graph representation of a Kedro pipeline. + The type needs to inherit from str as well so FastAPI can serialise it. See: + https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations + """ TASK = "task" DATA = "data" @@ -46,21 +49,41 @@ class GraphNodeType(str, Enum): class GraphNode(BaseModel, ABC): - """Abstract base class representing a node in the graph.""" + """Represent a node in the graph representation of a Kedro pipeline. + All node models except the metadata node models should inherit from this class + + Args: + id (str): A unique identifier for the node in the graph, + obtained by hashing the node's string representation. + name (str): The full name of this node obtained from the underlying Kedro object + type (str): The type of the graph node + tags (Set[str]): The tags associated with this node. Defaults to `set()`. + kedro_obj (Optional[Union[KedroNode, AbstractDataset]]): The underlying Kedro object + for each graph node, if any. Defaults to `None`. + pipelines (Set[str]): The set of registered pipeline IDs this + node belongs to. Defaults to `set()`. + modular_pipelines (Optional[Set(str)]): A set of modular pipeline names + this node belongs to. + + """ id: str name: str type: str - tags: Set[str] = Field(set()) + tags: Set[str] = Field(set(), description="The tags associated with this node") kedro_obj: Optional[Union[KedroNode, AbstractDataset]] = Field( None, description="The underlying Kedro object for each graph node, if any", exclude=True, ) - pipelines: Set[str] = Field(set()) + pipelines: Set[str] = Field( + set(), description="The set of registered pipeline IDs this node belongs to" + ) + modular_pipelines: Optional[Set[str]] = Field( default=None, validate_default=True, + description="The modular_pipelines this node belongs to", ) model_config = ConfigDict(arbitrary_types_allowed=True) @@ -68,6 +91,14 @@ class GraphNode(BaseModel, ABC): def create_task_node( cls, node: KedroNode, node_id: str, modular_pipelines: Optional[Set[str]] ) -> "TaskNode": + """Create a graph node of type task for a given Kedro Node instance. + Args: + node: A node in a Kedro pipeline. + node_id: Id of the task node. + modular_pipelines: A set of modular_pipeline_ids the node belongs to. + Returns: + An instance of TaskNode. + """ node_name = node._name or node._func_name return TaskNode( id=node_id, @@ -78,6 +109,7 @@ def create_task_node( ) @classmethod + # pylint: disable=too-many-positional-arguments def create_data_node( cls, dataset_id: str, @@ -89,6 +121,22 @@ def create_data_node( modular_pipelines: Optional[Set[str]], is_free_input: bool = False, ) -> Union["DataNode", "TranscodedDataNode"]: + """Create a graph node of type data for a given Kedro Dataset instance. + Args: + dataset_id: A hashed id for the dataset node + dataset_name: The name of the dataset, including namespace, e.g. + data_science.master_table. + layer: The optional layer that the dataset belongs to. + tags: The set of tags assigned to assign to the graph representation + of this dataset. N.B. currently it's derived from the node's tags. + dataset: A dataset in a Kedro pipeline. + stats: The dictionary of dataset statistics, e.g. + {"rows":2, "columns":3, "file_size":100} + modular_pipelines: A set of modular_pipeline_ids the node belongs to. + is_free_input: Whether the dataset is a free input in the pipeline + Returns: + An instance of DataNode. + """ is_transcoded_dataset = TRANSCODING_SEPARATOR in dataset_name if is_transcoded_dataset: name = _strip_transcoding(dataset_name) @@ -114,6 +162,7 @@ def create_data_node( ) @classmethod + # pylint: disable=too-many-positional-arguments def create_parameters_node( cls, dataset_id: str, @@ -123,6 +172,19 @@ def create_parameters_node( parameters: AbstractDataset, modular_pipelines: Optional[Set[str]], ) -> "ParametersNode": + """Create a graph node of type parameters for a given Kedro parameters dataset instance. + Args: + dataset_id: A hashed id for the parameters node + dataset_name: The name of the dataset, including namespace, e.g. + data_science.test_split_ratio + layer: The optional layer that the parameters belong to. + tags: The set of tags assigned to assign to the graph representation + of this dataset. N.B. currently it's derived from the node's tags. + parameters: A parameters dataset in a Kedro pipeline. + modular_pipelines: A set of modular_pipeline_ids the node belongs to. + Returns: + An instance of ParametersNode. + """ return ParametersNode( id=dataset_id, name=dataset_name, @@ -136,25 +198,50 @@ def create_parameters_node( def create_modular_pipeline_node( cls, modular_pipeline_id: str ) -> "ModularPipelineNode": + """Create a graph node of type modularPipeline for a given modular pipeline ID. + This is used to visualise all modular pipelines in a Kedro project on the graph. + Args: + modular_pipeline_id: The ID of the modular pipeline to convert into a graph node. + Returns: + An instance of ModularPipelineNode. + Example: + >>> node = GraphNode.create_modular_pipeline_node("pipeline.data_science") + >>> assert node.id == "pipeline.data_science" + >>> assert node.name == "pipeline.data_science" + >>> assert node.type == GraphNodeType.MODULAR_PIPELINE + """ return ModularPipelineNode(id=modular_pipeline_id, name=modular_pipeline_id) def add_pipeline(self, pipeline_id: str): + """Add a pipeline_id to the list of pipelines that this node belongs to.""" self.pipelines.add(pipeline_id) def belongs_to_pipeline(self, pipeline_id: str) -> bool: + """Check whether this graph node belongs to a given pipeline_id.""" return pipeline_id in self.pipelines def has_metadata(self) -> bool: + """Check whether this graph node has metadata. + Since metadata of a graph node is derived from the underlying Kedro object, + we just need to check whether the underlying object exists. + """ return self.kedro_obj is not None class TaskNode(GraphNode): - """Represent a graph node of type task.""" + """Represent a graph node of type task + + Raises: + AssertionError: If kedro_obj is not supplied during instantiation + """ parameters: Dict = Field( {}, description="A dictionary of parameter values for the task node" ) + + # The type for Task node type: str = GraphNodeType.TASK.value + namespace: Optional[str] = Field( default=None, validate_default=True, @@ -173,8 +260,18 @@ def set_namespace(cls, _, info: ValidationInfo): return info.data["kedro_obj"].namespace +# pylint: disable=missing-function-docstring class DataNode(GraphNode): - """Represent a graph node of type data.""" + """Represent a graph node of type data + + Args: + layer (Optional[str]): The layer that this data node belongs to. Defaults to `None`. + is_free_input (bool): Determines whether the data node is a free input. Defaults to `False`. + stats (Optional[Dict]): Statistics for the data node. Defaults to `None`. + + Raises: + AssertionError: If kedro_obj, name are not supplied during instantiation + """ layer: Optional[str] = Field( None, description="The layer that this data node belongs to" @@ -183,17 +280,22 @@ class DataNode(GraphNode): False, description="Determines whether the data node is a free input" ) stats: Optional[Dict] = Field(None, description="The statistics for the data node.") + dataset_type: Optional[str] = Field( default=None, validate_default=True, description="The concrete type of the underlying kedro_obj", ) + viz_metadata: Optional[Dict] = Field( default=None, validate_default=True, description="The metadata for data node" ) + run_command: Optional[str] = Field( None, description="The command to run the pipeline to this node" ) + + # The type for data node type: str = GraphNodeType.DATA.value @model_validator(mode="before") @@ -219,16 +321,30 @@ def set_viz_metadata(cls, _, info: ValidationInfo): return None def get_preview_args(self): + """Gets the preview arguments for a dataset""" return self.viz_metadata.get("preview_args", None) def is_preview_enabled(self): + """Checks if the dataset has a preview enabled at the node level.""" return ( self.viz_metadata is None or self.viz_metadata.get("preview") is not False ) class TranscodedDataNode(GraphNode): - """Represent a graph node of type data for transcoded datasets.""" + """Represent a graph node of type data + + Args: + layer (Optional[str]): The layer that this transcoded data + node belongs to. Defaults to `None`. + is_free_input (bool): Determines whether the transcoded data + node is a free input. Defaults to `False`. + stats (Optional[Dict]): Statistics for the data node + + Raises: + AssertionError: If name is not supplied during instantiation + + """ layer: Optional[str] = Field( None, description="The layer that this transcoded data node belongs to" @@ -244,12 +360,16 @@ class TranscodedDataNode(GraphNode): original_name: Optional[str] = Field( None, description="The original name for the generated run command" ) + run_command: Optional[str] = Field( None, description="The command to run the pipeline to this node" ) + # The transcoded versions of the transcoded data nodes. transcoded_versions: Set[AbstractDataset] = Field( - set(), description="The transcoded versions of the data nodes" + set(), description="The transcoded versions of the transcoded data nodes" ) + + # The type for data node type: str = GraphNodeType.DATA.value def has_metadata(self) -> bool: @@ -257,11 +377,19 @@ def has_metadata(self) -> bool: class ParametersNode(GraphNode): - """Represent a graph node of type parameters.""" + """Represent a graph node of type parameters + Args: + layer (Optional[str]): The layer that this parameters node belongs to. Defaults to `None`. + + Raises: + AssertionError: If kedro_obj, name are not supplied during instantiation + """ layer: Optional[str] = Field( None, description="The layer that this parameters node belongs to" ) + + # The type for Parameters Node type: str = GraphNodeType.PARAMETERS.value @model_validator(mode="before") @@ -272,26 +400,42 @@ def check_kedro_obj_and_name_exists(cls, values): return values def is_all_parameters(self) -> bool: + """Check whether the graph node represent all parameters in the pipeline""" return self.name == "parameters" def is_single_parameter(self) -> bool: + """Check whether the graph node represent a single parameter in the pipeline""" return not self.is_all_parameters() @property def parameter_name(self) -> str: + """Get a normalised parameter name without the "params:" prefix""" return self.name.replace("params:", "") @property def parameter_value(self) -> Any: + """Load the parameter value from the underlying dataset""" if not (self.kedro_obj and hasattr(self.kedro_obj, "load")): return None try: actual_parameter_value = self.kedro_obj.load() + # Return only json serializable value return jsonable_encoder(actual_parameter_value) except (TypeError, ValueError, RecursionError): + # In case the parameter is not JSON serializable, + # return the string representation return str(actual_parameter_value) - except Exception as exc: + except (AttributeError, DatasetError): + # This except clause triggers if the user passes a parameter that is not + # defined in the catalog (DatasetError) it also catches any case where + # the kedro_obj is None (AttributeError) -- GH#1231 + logger.warning( + "Cannot find parameter `%s` in the catalog.", self.parameter_name + ) + return None + # pylint: disable=broad-exception-caught + except Exception as exc: # pragma: no cover logger.error( "An error occurred when loading parameter `%s` in the catalog :: %s", self.parameter_name, @@ -301,16 +445,30 @@ def parameter_value(self) -> Any: class ModularPipelineNode(GraphNode): - """Represent a modular pipeline node in the graph.""" + """Represent a modular pipeline node in the graph""" + # A modular pipeline doesn't belong to any other modular pipeline, + # in the same sense as other types of GraphNode do. + # Therefore, it's default to None. + # The parent-child relationship between modular pipeline themselves is modelled explicitly. modular_pipelines: Optional[Set[str]] = None + + # Model the modular pipelines tree using a child-references representation of a tree. + # See: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-child-references/ + # for more details. + # For example, if a node namespace is "uk.data_science", + # the "uk" modular pipeline node's children are ["uk.data_science"] children: Set[ModularPipelineChild] = Field( set(), description="The children for the modular pipeline node" ) + inputs: Set[str] = Field( set(), description="The input datasets to the modular pipeline node" ) + outputs: Set[str] = Field( set(), description="The output datasets from the modular pipeline node" ) + + # The type for Modular Pipeline Node type: str = GraphNodeType.MODULAR_PIPELINE.value diff --git a/package/kedro_viz/models/flowchart/pipelines.py b/package/kedro_viz/models/flowchart/pipelines.py index a569c3bb83..ce023c7102 100644 --- a/package/kedro_viz/models/flowchart/pipelines.py +++ b/package/kedro_viz/models/flowchart/pipelines.py @@ -6,8 +6,17 @@ from .nodes import GraphNodeType -class ModularPipelineChild(BaseModel): - """Represent a child of a modular pipeline.""" +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" + + +class ModularPipelineChild(BaseModel, frozen=True): + """Represent a child of a modular pipeline. + + Args: + id (str): Id of the modular pipeline child + type (GraphNodeType): Type of modular pipeline child + """ id: str type: GraphNodeType @@ -29,7 +38,3 @@ class ModularPipelineNode(BaseModel): set(), description="The output datasets from the modular pipeline node" ) type: str = GraphNodeType.MODULAR_PIPELINE.value - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project.""" From dea5dcaf381f37491be64fed822458b94bc4e878 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 24 Oct 2024 15:30:08 +0100 Subject: [PATCH 08/18] fix imports Signed-off-by: Sajid Alam --- package/kedro_viz/api/rest/responses.py | 6 ++---- package/kedro_viz/data_access/managers.py | 10 ++++++---- .../kedro_viz/data_access/repositories/graph.py | 3 ++- .../repositories/modular_pipelines.py | 4 ++-- .../repositories/registered_pipelines.py | 2 +- .../kedro_viz/data_access/repositories/tags.py | 2 +- package/kedro_viz/models/flowchart/edge.py | 2 ++ .../kedro_viz/models/flowchart/model_utils.py | 6 +++--- .../kedro_viz/models/flowchart/node_metadata.py | 6 ++++-- package/kedro_viz/models/flowchart/nodes.py | 17 +++++++++-------- package/kedro_viz/models/flowchart/pipelines.py | 2 ++ package/kedro_viz/models/flowchart/tag.py | 2 ++ package/kedro_viz/services/layers.py | 2 +- package/tests/conftest.py | 3 ++- .../tests/test_api/test_rest/test_responses.py | 2 +- package/tests/test_data_access/test_managers.py | 6 +++--- .../test_repositories/test_graph.py | 2 +- .../test_repositories/test_modular_pipelines.py | 7 ++----- package/tests/test_models/test_flowchart.py | 14 ++++++++------ package/tests/test_services/test_layers.py | 2 +- 20 files changed, 55 insertions(+), 45 deletions(-) diff --git a/package/kedro_viz/api/rest/responses.py b/package/kedro_viz/api/rest/responses.py index 2f59d33b16..9dafe8a5a3 100644 --- a/package/kedro_viz/api/rest/responses.py +++ b/package/kedro_viz/api/rest/responses.py @@ -13,15 +13,13 @@ from kedro_viz.api.rest.utils import get_package_compatibilities from kedro_viz.data_access import data_access_manager -from kedro_viz.models.flowchart import ( - DataNode, +from kedro_viz.models.flowchart.node_metadata import ( DataNodeMetadata, ParametersNodeMetadata, - TaskNode, TaskNodeMetadata, - TranscodedDataNode, TranscodedDataNodeMetadata, ) +from kedro_viz.models.flowchart.nodes import DataNode, TaskNode, TranscodedDataNode from kedro_viz.models.metadata import Metadata, PackageCompatibility logger = logging.getLogger(__name__) diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 40e00ebe55..41634dc9f5 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -14,18 +14,20 @@ from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import ( DataNode, - GraphEdge, GraphNode, GraphNodeType, - ModularPipelineChild, ModularPipelineNode, ParametersNode, - RegisteredPipeline, TaskNode, TranscodedDataNode, ) +from kedro_viz.models.flowchart.pipelines import ( + ModularPipelineChild, + RegisteredPipeline, +) from kedro_viz.services import layers_services from kedro_viz.utils import _strip_transcoding, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/graph.py b/package/kedro_viz/data_access/repositories/graph.py index 90f734ec1d..1ebfb19533 100644 --- a/package/kedro_viz/data_access/repositories/graph.py +++ b/package/kedro_viz/data_access/repositories/graph.py @@ -3,7 +3,8 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Dict, Generator, List, Optional, Set -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import GraphNode class GraphNodesRepository: diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index 25b7645ff4..6c5d48ef4f 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -9,12 +9,12 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.nodes import ( GraphNode, GraphNodeType, - ModularPipelineChild, ModularPipelineNode, ) +from kedro_viz.models.flowchart.pipelines import ModularPipelineChild from kedro_viz.utils import _hash, _hash_input_output, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index 16cdd98adf..08018c3a3d 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -4,7 +4,7 @@ from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart import RegisteredPipeline +from kedro_viz.models.flowchart.pipelines import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/data_access/repositories/tags.py b/package/kedro_viz/data_access/repositories/tags.py index eae5c68bb0..96ada994ff 100644 --- a/package/kedro_viz/data_access/repositories/tags.py +++ b/package/kedro_viz/data_access/repositories/tags.py @@ -3,7 +3,7 @@ # pylint: disable=missing-class-docstring,missing-function-docstring from typing import Iterable, List, Set -from kedro_viz.models.flowchart import Tag +from kedro_viz.models.flowchart.tag import Tag class TagsRepository: diff --git a/package/kedro_viz/models/flowchart/edge.py b/package/kedro_viz/models/flowchart/edge.py index 9f9757501f..439cafc782 100644 --- a/package/kedro_viz/models/flowchart/edge.py +++ b/package/kedro_viz/models/flowchart/edge.py @@ -1,3 +1,5 @@ +"""`kedro_viz.models.flowchart.edge` defines data models to represent Kedro edges in a viz graph.""" + from pydantic import BaseModel diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index 52098d9ac8..c23291b352 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -1,8 +1,8 @@ -import inspect +"""`kedro_viz.models.flowchart.model_utils` defines utils for Kedro entities in a viz graph.""" + import logging -from pathlib import Path from types import FunctionType -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from pydantic import BaseModel, Field, ValidationInfo, field_validator diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py index 6129a62aaf..acdcbda4a3 100644 --- a/package/kedro_viz/models/flowchart/node_metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -1,11 +1,13 @@ +"""`kedro_viz.models.flowchart.node_metadata` defines data models to represent Kedro metadata in a viz graph.""" + import inspect import logging from abc import ABC from pathlib import Path -from typing import Any, ClassVar, Dict, List, Optional, Union, cast +from typing import ClassVar, Dict, List, Optional, Union, cast from kedro.pipeline.node import Node as KedroNode -from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator try: # kedro 0.18.11 onwards diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index ff9e2408e8..08da784eb4 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,9 +1,12 @@ +"""`kedro_viz.models.flowchart.nodes` defines data models to represent Kedro nodes in a viz graph.""" + import logging from abc import ABC from enum import Enum -from typing import Any, ClassVar, Dict, List, Optional, Set, Union, cast +from typing import Any, Dict, Optional, Set, Union, cast from fastapi.encoders import jsonable_encoder +from kedro.pipeline.node import Node as KedroNode from pydantic import ( BaseModel, ConfigDict, @@ -15,11 +18,10 @@ try: # kedro 0.18.11 onwards - from kedro.pipeline.node import Node as KedroNode + from kedro.io.core import DatasetError except ImportError: # pragma: no cover - # Handle older versions or custom implementations - KedroNode = Any # Replace with appropriate import or definition - + # older versions + from kedro.io.core import DataSetError as DatasetError # type: ignore try: # kedro 0.18.12 onwards from kedro.io.core import AbstractDataset @@ -29,9 +31,8 @@ from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding -from .entities import NamedEntity -from .modular_pipelines import ModularPipelineChild -from .utils import _extract_wrapped_func, _parse_filepath, get_dataset_type +from .model_utils import get_dataset_type +from .pipelines import ModularPipelineChild logger = logging.getLogger(__name__) diff --git a/package/kedro_viz/models/flowchart/pipelines.py b/package/kedro_viz/models/flowchart/pipelines.py index ce023c7102..ba343dcf2b 100644 --- a/package/kedro_viz/models/flowchart/pipelines.py +++ b/package/kedro_viz/models/flowchart/pipelines.py @@ -1,3 +1,5 @@ +"""`kedro_viz.models.flowchart.pipelines` represent Kedro pipelines in a viz graph.""" + from typing import Optional, Set from pydantic import BaseModel, Field diff --git a/package/kedro_viz/models/flowchart/tag.py b/package/kedro_viz/models/flowchart/tag.py index e87a692feb..357f0137a9 100644 --- a/package/kedro_viz/models/flowchart/tag.py +++ b/package/kedro_viz/models/flowchart/tag.py @@ -1,3 +1,5 @@ +"""`kedro_viz.models.tag` defines data models to represent Kedro tags in a viz graph.""" + from .model_utils import NamedEntity diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 4eab727e80..3d6852c7ee 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -4,7 +4,7 @@ from graphlib import CycleError, TopologicalSorter from typing import Dict, List, Set -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.nodes import GraphNode logger = logging.getLogger(__name__) diff --git a/package/tests/conftest.py b/package/tests/conftest.py index d63fca7fd3..7294379fee 100644 --- a/package/tests/conftest.py +++ b/package/tests/conftest.py @@ -21,7 +21,8 @@ ) from kedro_viz.integrations.kedro.hooks import DatasetStatsHook from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore -from kedro_viz.models.flowchart import DataNodeMetadata, GraphNode +from kedro_viz.models.flowchart.node_metadata import DataNodeMetadata +from kedro_viz.models.flowchart.nodes import GraphNode from kedro_viz.server import populate_data diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py index 3f75904404..44b69ac53a 100644 --- a/package/tests/test_api/test_rest/test_responses.py +++ b/package/tests/test_api/test_rest/test_responses.py @@ -20,7 +20,7 @@ save_api_responses_to_fs, write_api_response_to_fs, ) -from kedro_viz.models.flowchart import TaskNode +from kedro_viz.models.flowchart.nodes import TaskNode from kedro_viz.models.metadata import Metadata diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index 66bd08f1e9..b12997d67e 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -15,14 +15,14 @@ ModularPipelinesRepository, ) from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import ( DataNode, - GraphEdge, ParametersNode, - Tag, TaskNode, TranscodedDataNode, ) +from kedro_viz.models.flowchart.tag import Tag def identity(x): diff --git a/package/tests/test_data_access/test_repositories/test_graph.py b/package/tests/test_data_access/test_repositories/test_graph.py index c45232ebd1..fb6bbd10d6 100644 --- a/package/tests/test_data_access/test_repositories/test_graph.py +++ b/package/tests/test_data_access/test_repositories/test_graph.py @@ -4,7 +4,7 @@ GraphEdgesRepository, GraphNodesRepository, ) -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.edge import GraphEdge class TestGraphNodeRepository: diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index 5b5a5e783b..5decc6863e 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,11 +6,8 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart import ( - GraphNodeType, - ModularPipelineChild, - ModularPipelineNode, -) +from kedro_viz.models.flowchart.nodes import GraphNodeType, ModularPipelineNode +from kedro_viz.models.flowchart.pipelines import ModularPipelineChild @pytest.fixture diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index 01238f286d..f030eeea58 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -9,18 +9,20 @@ from kedro_datasets.pandas import CSVDataset, ParquetDataset from kedro_datasets.partitions.partitioned_dataset import PartitionedDataset -from kedro_viz.models.flowchart import ( - DataNode, +from kedro_viz.models.flowchart.node_metadata import ( DataNodeMetadata, + ParametersNodeMetadata, + TaskNodeMetadata, + TranscodedDataNodeMetadata, +) +from kedro_viz.models.flowchart.nodes import ( + DataNode, GraphNode, ParametersNode, - ParametersNodeMetadata, - RegisteredPipeline, TaskNode, - TaskNodeMetadata, TranscodedDataNode, - TranscodedDataNodeMetadata, ) +from kedro_viz.models.flowchart.pipelines import RegisteredPipeline def identity(x): diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index 80d76fae5a..c949a9f98b 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -1,6 +1,6 @@ import pytest -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.nodes import GraphNode from kedro_viz.services.layers import sort_layers From 4d0941714f065126631568d4641b74e7e0f113fa Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 25 Oct 2024 10:54:08 +0100 Subject: [PATCH 09/18] resolve circular dependency Signed-off-by: Sajid Alam --- .../kedro_viz/models/flowchart/model_utils.py | 30 +++++++++++++++++++ package/kedro_viz/models/flowchart/nodes.py | 16 +--------- .../kedro_viz/models/flowchart/pipelines.py | 15 +--------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index c23291b352..d135a77cb6 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -1,6 +1,7 @@ """`kedro_viz.models.flowchart.model_utils` defines utils for Kedro entities in a viz graph.""" import logging +from enum import Enum from types import FunctionType from typing import Any, Dict, Optional @@ -38,6 +39,11 @@ def get_dataset_type(dataset: AbstractDataset) -> str: return f"{dataset.__class__.__module__}.{dataset.__class__.__qualname__}" +# ============================================================================= +# Shared base classes and enumerations for model components +# ============================================================================= + + class NamedEntity(BaseModel): """Represent a named entity (Tag/Registered Pipeline) in a Kedro project Args: @@ -59,3 +65,27 @@ class NamedEntity(BaseModel): def set_name(cls, _, info: ValidationInfo): assert "id" in info.data return info.data["id"] + + +class GraphNodeType(str, Enum): + """Represent all possible node types in the graph representation of a Kedro pipeline. + The type needs to inherit from str as well so FastAPI can serialise it. See: + https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations + """ + + TASK = "task" + DATA = "data" + PARAMETERS = "parameters" + MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility + + +class ModularPipelineChild(BaseModel, frozen=True): + """Represent a child of a modular pipeline. + + Args: + id (str): Id of the modular pipeline child + type (GraphNodeType): Type of modular pipeline child + """ + + id: str + type: GraphNodeType diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index 08da784eb4..39f015fcef 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -2,7 +2,6 @@ import logging from abc import ABC -from enum import Enum from typing import Any, Dict, Optional, Set, Union, cast from fastapi.encoders import jsonable_encoder @@ -31,24 +30,11 @@ from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding -from .model_utils import get_dataset_type -from .pipelines import ModularPipelineChild +from .model_utils import GraphNodeType, ModularPipelineChild, get_dataset_type logger = logging.getLogger(__name__) -class GraphNodeType(str, Enum): - """Represent all possible node types in the graph representation of a Kedro pipeline. - The type needs to inherit from str as well so FastAPI can serialise it. See: - https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations - """ - - TASK = "task" - DATA = "data" - PARAMETERS = "parameters" - MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility - - class GraphNode(BaseModel, ABC): """Represent a node in the graph representation of a Kedro pipeline. All node models except the metadata node models should inherit from this class diff --git a/package/kedro_viz/models/flowchart/pipelines.py b/package/kedro_viz/models/flowchart/pipelines.py index ba343dcf2b..0ea7b95815 100644 --- a/package/kedro_viz/models/flowchart/pipelines.py +++ b/package/kedro_viz/models/flowchart/pipelines.py @@ -4,26 +4,13 @@ from pydantic import BaseModel, Field -from .model_utils import NamedEntity -from .nodes import GraphNodeType +from .model_utils import GraphNodeType, ModularPipelineChild, NamedEntity class RegisteredPipeline(NamedEntity): """Represent a registered pipeline in a Kedro project.""" -class ModularPipelineChild(BaseModel, frozen=True): - """Represent a child of a modular pipeline. - - Args: - id (str): Id of the modular pipeline child - type (GraphNodeType): Type of modular pipeline child - """ - - id: str - type: GraphNodeType - - class ModularPipelineNode(BaseModel): """Represent a modular pipeline node in the graph.""" From 79f51408c9323754c9506239dcf38b8ab4909e93 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 25 Oct 2024 11:47:35 +0100 Subject: [PATCH 10/18] fix tests Signed-off-by: Sajid Alam --- package/kedro_viz/data_access/managers.py | 7 ++----- .../data_access/repositories/modular_pipelines.py | 8 ++------ package/kedro_viz/models/flowchart/model_utils.py | 7 ++----- package/kedro_viz/models/flowchart/node_metadata.py | 7 ------- package/kedro_viz/models/flowchart/nodes.py | 2 +- .../test_data_access/test_repositories/test_graph.py | 1 + .../test_repositories/test_modular_pipelines.py | 4 ++-- 7 files changed, 10 insertions(+), 26 deletions(-) diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 41634dc9f5..3561801a15 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -15,19 +15,16 @@ from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild from kedro_viz.models.flowchart.nodes import ( DataNode, GraphNode, - GraphNodeType, ModularPipelineNode, ParametersNode, TaskNode, TranscodedDataNode, ) -from kedro_viz.models.flowchart.pipelines import ( - ModularPipelineChild, - RegisteredPipeline, -) +from kedro_viz.models.flowchart.pipelines import RegisteredPipeline from kedro_viz.services import layers_services from kedro_viz.utils import _strip_transcoding, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index 6c5d48ef4f..d3686217d1 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -9,12 +9,8 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart.nodes import ( - GraphNode, - GraphNodeType, - ModularPipelineNode, -) -from kedro_viz.models.flowchart.pipelines import ModularPipelineChild +from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild +from kedro_viz.models.flowchart.nodes import GraphNode, ModularPipelineNode from kedro_viz.utils import _hash, _hash_input_output, is_dataset_param diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index d135a77cb6..7425788389 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -14,6 +14,8 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore +from kedro_viz.models.utils import get_dataset_type + logger = logging.getLogger(__name__) @@ -34,11 +36,6 @@ def _extract_wrapped_func(func: FunctionType) -> FunctionType: return func if wrapped_func is None else wrapped_func -def get_dataset_type(dataset: AbstractDataset) -> str: - """Utility function to get the dataset type.""" - return f"{dataset.__class__.__module__}.{dataset.__class__.__qualname__}" - - # ============================================================================= # Shared base classes and enumerations for model components # ============================================================================= diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py index acdcbda4a3..6ce7f54b6e 100644 --- a/package/kedro_viz/models/flowchart/node_metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -9,13 +9,6 @@ from kedro.pipeline.node import Node as KedroNode from pydantic import BaseModel, Field, field_validator, model_validator -try: - # kedro 0.18.11 onwards - from kedro.io.core import DatasetError -except ImportError: # pragma: no cover - # older versions - from kedro.io.core import DataSetError as DatasetError # type: ignore - try: # kedro 0.18.12 onwards from kedro.io.core import AbstractDataset diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index 39f015fcef..b68b241b30 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,4 +1,4 @@ -"""`kedro_viz.models.flowchart.nodes` defines data models to represent Kedro nodes in a viz graph.""" +"""`kedro_viz.models.flowchart.nodes` defines models to represent Kedro nodes in a viz graph.""" import logging from abc import ABC diff --git a/package/tests/test_data_access/test_repositories/test_graph.py b/package/tests/test_data_access/test_repositories/test_graph.py index fb6bbd10d6..51f8684368 100644 --- a/package/tests/test_data_access/test_repositories/test_graph.py +++ b/package/tests/test_data_access/test_repositories/test_graph.py @@ -5,6 +5,7 @@ GraphNodesRepository, ) from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import GraphNode class TestGraphNodeRepository: diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index 5decc6863e..ccb05c0d0a 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,8 +6,8 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart.nodes import GraphNodeType, ModularPipelineNode -from kedro_viz.models.flowchart.pipelines import ModularPipelineChild +from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild +from kedro_viz.models.flowchart.nodes import ModularPipelineNode @pytest.fixture From 0f9d2cd3bf7bd3acdc8a0a4e316a56c65d7244a4 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Fri, 25 Oct 2024 12:32:53 +0100 Subject: [PATCH 11/18] lint Signed-off-by: Sajid Alam --- .../kedro_viz/models/flowchart/model_utils.py | 13 +++----- .../models/flowchart/node_metadata.py | 32 +++++++++++-------- package/kedro_viz/models/flowchart/nodes.py | 5 ++- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index 7425788389..df4c9fdf44 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -7,19 +7,13 @@ from pydantic import BaseModel, Field, ValidationInfo, field_validator -try: - # kedro 0.18.12 onwards - from kedro.io.core import AbstractDataset -except ImportError: # pragma: no cover - # older versions - from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore - -from kedro_viz.models.utils import get_dataset_type - logger = logging.getLogger(__name__) def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: + """ + Extract the file path from a dataset description dictionary. + """ filepath = dataset_description.get("filepath") or dataset_description.get("path") return str(filepath) if filepath else None @@ -60,6 +54,7 @@ class NamedEntity(BaseModel): @field_validator("name") @classmethod def set_name(cls, _, info: ValidationInfo): + """Ensures that the 'name' field is set to the value of 'id' if 'name' is not provided.""" assert "id" in info.data return info.data["id"] diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py index 6ce7f54b6e..680610ed2a 100644 --- a/package/kedro_viz/models/flowchart/node_metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -1,4 +1,9 @@ -"""`kedro_viz.models.flowchart.node_metadata` defines data models to represent Kedro metadata in a viz graph.""" +""" +`kedro_viz.models.flowchart.node_metadata` defines data models to represent +Kedro metadata in a visualization graph. +""" + +# pylint: disable=protected-access, missing-function-docstring import inspect import logging @@ -16,24 +21,26 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore -from .model_utils import _extract_wrapped_func, _parse_filepath, get_dataset_type +from kedro_viz.models.utils import get_dataset_type + +from .model_utils import _extract_wrapped_func, _parse_filepath from .nodes import DataNode, ParametersNode, TaskNode, TranscodedDataNode logger = logging.getLogger(__name__) class GraphNodeMetadata(BaseModel, ABC): - """Represent a graph node's metadata""" + """Represent a graph node's metadata.""" class TaskNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TaskNode + """Represent the metadata of a TaskNode. Args: task_node (TaskNode): Task node to which this metadata belongs to. Raises: - AssertionError: If task_node is not supplied during instantiation + AssertionError: If task_node is not supplied during instantiation. """ task_node: TaskNode = Field(..., exclude=True) @@ -133,7 +140,7 @@ def set_outputs(cls, _): class DataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a DataNode + """Represent the metadata of a DataNode. Args: data_node (DataNode): Data node to which this metadata belongs to. @@ -144,7 +151,7 @@ class DataNodeMetadata(GraphNodeMetadata): or UI to manage the preview settings. Raises: - AssertionError: If data_node is not supplied during instantiation + AssertionError: If data_node is not supplied during instantiation. """ data_node: DataNode = Field(..., exclude=True) @@ -287,13 +294,12 @@ def set_stats(cls, _): class TranscodedDataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TranscodedDataNode + """Represent the metadata of a TranscodedDataNode. Args: - transcoded_data_node (TranscodedDataNode): The underlying transcoded - data node to which this metadata belongs to. + transcoded_data_node: The transcoded data node to which this metadata belongs. Raises: - AssertionError: If transcoded_data_node is not supplied during instantiation + AssertionError: If `transcoded_data_node` is not supplied during instantiation. """ transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) @@ -368,14 +374,14 @@ def set_stats(cls, _): class ParametersNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a ParametersNode + """Represent the metadata of a ParametersNode. Args: parameters_node (ParametersNode): The underlying parameters node for the parameters metadata node. Raises: - AssertionError: If parameters_node is not supplied during instantiation + AssertionError: If parameters_node is not supplied during instantiation. """ parameters_node: ParametersNode = Field(..., exclude=True) diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index b68b241b30..ca20077749 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,5 +1,7 @@ """`kedro_viz.models.flowchart.nodes` defines models to represent Kedro nodes in a viz graph.""" +# pylint: disable=protected-access, missing-function-docstring + import logging from abc import ABC from typing import Any, Dict, Optional, Set, Union, cast @@ -28,9 +30,10 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore +from kedro_viz.models.utils import get_dataset_type from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding -from .model_utils import GraphNodeType, ModularPipelineChild, get_dataset_type +from .model_utils import GraphNodeType, ModularPipelineChild logger = logging.getLogger(__name__) From a5d23728840f50df4ee5a3c90d951e5a9f23ffcf Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 11:25:32 +0000 Subject: [PATCH 12/18] changes based on review Signed-off-by: Sajid Alam --- package/kedro_viz/data_access/managers.py | 5 ++-- .../repositories/modular_pipelines.py | 8 +++-- .../repositories/registered_pipelines.py | 2 +- .../kedro_viz/models/flowchart/model_utils.py | 12 -------- package/kedro_viz/models/flowchart/nodes.py | 14 ++++++++- .../kedro_viz/models/flowchart/pipeline.py | 7 +++++ .../kedro_viz/models/flowchart/pipelines.py | 29 ------------------- .../test_modular_pipelines.py | 4 +-- package/tests/test_models/test_flowchart.py | 2 +- 9 files changed, 33 insertions(+), 50 deletions(-) create mode 100644 package/kedro_viz/models/flowchart/pipeline.py delete mode 100644 package/kedro_viz/models/flowchart/pipelines.py diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 3561801a15..8f0f1473ee 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -15,16 +15,17 @@ from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset from kedro_viz.models.flowchart.edge import GraphEdge -from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild +from kedro_viz.models.flowchart.model_utils import GraphNodeType from kedro_viz.models.flowchart.nodes import ( DataNode, GraphNode, + ModularPipelineChild, ModularPipelineNode, ParametersNode, TaskNode, TranscodedDataNode, ) -from kedro_viz.models.flowchart.pipelines import RegisteredPipeline +from kedro_viz.models.flowchart.pipeline import RegisteredPipeline from kedro_viz.services import layers_services from kedro_viz.utils import _strip_transcoding, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index d3686217d1..79c3b07df1 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -9,8 +9,12 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild -from kedro_viz.models.flowchart.nodes import GraphNode, ModularPipelineNode +from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.nodes import ( + GraphNode, + ModularPipelineChild, + ModularPipelineNode, +) from kedro_viz.utils import _hash, _hash_input_output, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index 08018c3a3d..62a372cebd 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -4,7 +4,7 @@ from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart.pipelines import RegisteredPipeline +from kedro_viz.models.flowchart.pipeline import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index df4c9fdf44..228d1bb6b6 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -69,15 +69,3 @@ class GraphNodeType(str, Enum): DATA = "data" PARAMETERS = "parameters" MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility - - -class ModularPipelineChild(BaseModel, frozen=True): - """Represent a child of a modular pipeline. - - Args: - id (str): Id of the modular pipeline child - type (GraphNodeType): Type of modular pipeline child - """ - - id: str - type: GraphNodeType diff --git a/package/kedro_viz/models/flowchart/nodes.py b/package/kedro_viz/models/flowchart/nodes.py index ca20077749..e9242d27a1 100644 --- a/package/kedro_viz/models/flowchart/nodes.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -33,7 +33,7 @@ from kedro_viz.models.utils import get_dataset_type from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding -from .model_utils import GraphNodeType, ModularPipelineChild +from .model_utils import GraphNodeType logger = logging.getLogger(__name__) @@ -218,6 +218,18 @@ def has_metadata(self) -> bool: return self.kedro_obj is not None +class ModularPipelineChild(BaseModel, frozen=True): + """Represent a child of a modular pipeline. + + Args: + id (str): Id of the modular pipeline child + type (GraphNodeType): Type of modular pipeline child + """ + + id: str + type: GraphNodeType + + class TaskNode(GraphNode): """Represent a graph node of type task diff --git a/package/kedro_viz/models/flowchart/pipeline.py b/package/kedro_viz/models/flowchart/pipeline.py new file mode 100644 index 0000000000..f19f86c445 --- /dev/null +++ b/package/kedro_viz/models/flowchart/pipeline.py @@ -0,0 +1,7 @@ +"""`kedro_viz.models.flowchart.pipelines` represent Kedro pipelines in a viz graph.""" + +from .model_utils import NamedEntity + + +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" diff --git a/package/kedro_viz/models/flowchart/pipelines.py b/package/kedro_viz/models/flowchart/pipelines.py deleted file mode 100644 index 0ea7b95815..0000000000 --- a/package/kedro_viz/models/flowchart/pipelines.py +++ /dev/null @@ -1,29 +0,0 @@ -"""`kedro_viz.models.flowchart.pipelines` represent Kedro pipelines in a viz graph.""" - -from typing import Optional, Set - -from pydantic import BaseModel, Field - -from .model_utils import GraphNodeType, ModularPipelineChild, NamedEntity - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project.""" - - -class ModularPipelineNode(BaseModel): - """Represent a modular pipeline node in the graph.""" - - id: str - name: str - modular_pipelines: Optional[Set[str]] = None - children: Set[ModularPipelineChild] = Field( - set(), description="The children for the modular pipeline node" - ) - inputs: Set[str] = Field( - set(), description="The input datasets to the modular pipeline node" - ) - outputs: Set[str] = Field( - set(), description="The output datasets from the modular pipeline node" - ) - type: str = GraphNodeType.MODULAR_PIPELINE.value diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index ccb05c0d0a..ef6058ca8b 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,8 +6,8 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart.model_utils import GraphNodeType, ModularPipelineChild -from kedro_viz.models.flowchart.nodes import ModularPipelineNode +from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.nodes import ModularPipelineChild, ModularPipelineNode @pytest.fixture diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index f030eeea58..a60fd9304f 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -22,7 +22,7 @@ TaskNode, TranscodedDataNode, ) -from kedro_viz.models.flowchart.pipelines import RegisteredPipeline +from kedro_viz.models.flowchart.pipeline import RegisteredPipeline def identity(x): From 8dbc77d833f77c5c5752d48000601cd0e5acaebb Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 13:05:13 +0000 Subject: [PATCH 13/18] split flowchart test file Signed-off-by: Sajid Alam --- ...est_flowchart.py => test_node_metadata.py} | 268 +----------------- package/tests/test_models/test_nodes.py | 248 ++++++++++++++++ package/tests/test_models/test_pipeline.py | 32 +++ 3 files changed, 281 insertions(+), 267 deletions(-) rename package/tests/test_models/{test_flowchart.py => test_node_metadata.py} (55%) create mode 100644 package/tests/test_models/test_nodes.py create mode 100644 package/tests/test_models/test_pipeline.py diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_node_metadata.py similarity index 55% rename from package/tests/test_models/test_flowchart.py rename to package/tests/test_models/test_node_metadata.py index a60fd9304f..f8ebd4f8ec 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_node_metadata.py @@ -1,7 +1,6 @@ from functools import partial from pathlib import Path from textwrap import dedent -from unittest.mock import call, patch import pytest from kedro.io import MemoryDataset @@ -15,14 +14,7 @@ TaskNodeMetadata, TranscodedDataNodeMetadata, ) -from kedro_viz.models.flowchart.nodes import ( - DataNode, - GraphNode, - ParametersNode, - TaskNode, - TranscodedDataNode, -) -from kedro_viz.models.flowchart.pipeline import RegisteredPipeline +from kedro_viz.models.flowchart.nodes import GraphNode def identity(x): @@ -58,264 +50,6 @@ def full_func(a, b, c, x): partial_func = partial(full_func, 3, 1, 4) -class TestGraphNodeCreation: - @pytest.mark.parametrize( - "namespace,expected_modular_pipelines", - [ - (None, set()), - ( - "uk.data_science.model_training", - set( - [ - "uk", - "uk.data_science", - "uk.data_science.model_training", - ] - ), - ), - ], - ) - def test_create_task_node(self, namespace, expected_modular_pipelines): - kedro_node = node( - identity, - inputs="x", - outputs="y", - name="identity_node", - tags={"tag"}, - namespace=namespace, - ) - task_node = GraphNode.create_task_node( - kedro_node, "identity_node", expected_modular_pipelines - ) - assert isinstance(task_node, TaskNode) - assert task_node.kedro_obj is kedro_node - assert task_node.name == "identity_node" - assert task_node.tags == {"tag"} - assert task_node.pipelines == set() - assert task_node.modular_pipelines == expected_modular_pipelines - assert task_node.namespace == namespace - - @pytest.mark.parametrize( - "dataset_name, expected_modular_pipelines", - [ - ("dataset", set()), - ( - "uk.data_science.model_training.dataset", - set( - [ - "uk", - "uk.data_science", - "uk.data_science.model_training", - ] - ), - ), - ], - ) - def test_create_data_node(self, dataset_name, expected_modular_pipelines): - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id=dataset_name, - dataset_name=dataset_name, - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 5, "file_size": 1024}, - modular_pipelines=set(expected_modular_pipelines), - ) - assert isinstance(data_node, DataNode) - assert data_node.kedro_obj is kedro_dataset - assert data_node.id == dataset_name - assert data_node.name == dataset_name - assert data_node.layer == "raw" - assert data_node.tags == set() - assert data_node.pipelines == set() - assert data_node.modular_pipelines == expected_modular_pipelines - assert data_node.stats["rows"] == 10 - assert data_node.stats["columns"] == 5 - assert data_node.stats["file_size"] == 1024 - - @pytest.mark.parametrize( - "transcoded_dataset_name, original_name", - [ - ("dataset@pandas2", "dataset"), - ( - "uk.data_science.model_training.dataset@pandas2", - "uk.data_science.model_training.dataset", - ), - ], - ) - def test_create_transcoded_data_node(self, transcoded_dataset_name, original_name): - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id=original_name, - dataset_name=transcoded_dataset_name, - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 2, "file_size": 1048}, - modular_pipelines=set(), - ) - assert isinstance(data_node, TranscodedDataNode) - assert data_node.id == original_name - assert data_node.name == original_name - assert data_node.layer == "raw" - assert data_node.tags == set() - assert data_node.pipelines == set() - assert data_node.stats["rows"] == 10 - assert data_node.stats["columns"] == 2 - assert data_node.stats["file_size"] == 1048 - - def test_create_parameters_all_parameters(self): - parameters_dataset = MemoryDataset( - data={"test_split_ratio": 0.3, "num_epochs": 1000} - ) - parameters_node = GraphNode.create_parameters_node( - dataset_id="parameters", - dataset_name="parameters", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert parameters_node.id == "parameters" - assert parameters_node.is_all_parameters() - assert not parameters_node.is_single_parameter() - assert parameters_node.parameter_value == { - "test_split_ratio": 0.3, - "num_epochs": 1000, - } - assert not parameters_node.modular_pipelines - - @pytest.mark.parametrize( - "dataset_name,expected_modular_pipelines", - [ - ("params:test_split_ratio", set()), - ( - "params:uk.data_science.model_training.test_split_ratio", - set(["uk", "uk.data_science", "uk.data_science.model_training"]), - ), - ], - ) - def test_create_parameters_node_single_parameter( - self, dataset_name, expected_modular_pipelines - ): - parameters_dataset = MemoryDataset(data=0.3) - parameters_node = GraphNode.create_parameters_node( - dataset_id=dataset_name, - dataset_name=dataset_name, - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=expected_modular_pipelines, - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert not parameters_node.is_all_parameters() - assert parameters_node.is_single_parameter() - assert parameters_node.parameter_value == 0.3 - assert parameters_node.modular_pipelines == expected_modular_pipelines - - def test_create_single_parameter_with_complex_type(self): - parameters_dataset = MemoryDataset(data=object()) - parameters_node = GraphNode.create_parameters_node( - dataset_id="params:test_split_ratio", - dataset_name="params:test_split_ratio", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert not parameters_node.is_all_parameters() - assert parameters_node.is_single_parameter() - assert isinstance(parameters_node.parameter_value, str) - - def test_create_all_parameters_with_complex_type(self): - mock_object = object() - parameters_dataset = MemoryDataset( - data={ - "test_split_ratio": 0.3, - "num_epochs": 1000, - "complex_param": mock_object, - } - ) - parameters_node = GraphNode.create_parameters_node( - dataset_id="parameters", - dataset_name="parameters", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert parameters_node.id == "parameters" - assert parameters_node.is_all_parameters() - assert not parameters_node.is_single_parameter() - assert isinstance(parameters_node.parameter_value, str) - - def test_create_non_existing_parameter_node(self): - """Test the case where ``parameters`` is equal to None""" - parameters_node = GraphNode.create_parameters_node( - dataset_id="non_existing", - dataset_name="non_existing", - layer=None, - tags=set(), - parameters=None, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.parameter_value is None - - @patch("logging.Logger.warning") - def test_create_non_existing_parameter_node_empty_dataset(self, patched_warning): - """Test the case where ``parameters`` is equal to a MemoryDataset with no data""" - parameters_dataset = MemoryDataset() - parameters_node = GraphNode.create_parameters_node( - dataset_id="non_existing", - dataset_name="non_existing", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert parameters_node.parameter_value is None - patched_warning.assert_has_calls( - [call("Cannot find parameter `%s` in the catalog.", "non_existing")] - ) - - -class TestGraphNodePipelines: - def test_registered_pipeline_name(self): - pipeline = RegisteredPipeline(id="__default__") - assert pipeline.name == "__default__" - - def test_modular_pipeline_name(self): - pipeline = GraphNode.create_modular_pipeline_node("data_engineering") - assert pipeline.name == "data_engineering" - - def test_add_node_to_pipeline(self): - default_pipeline = RegisteredPipeline(id="__default__") - another_pipeline = RegisteredPipeline(id="testing") - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id="dataset@transcoded", - dataset_name="dataset@transcoded", - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 2, "file_size": 1048}, - modular_pipelines=set(), - ) - assert data_node.pipelines == set() - data_node.add_pipeline(default_pipeline.id) - assert data_node.belongs_to_pipeline(default_pipeline.id) - assert not data_node.belongs_to_pipeline(another_pipeline.id) - - class TestGraphNodeMetadata: @pytest.mark.parametrize( "dataset,has_metadata", [(MemoryDataset(data=1), True), (None, False)] diff --git a/package/tests/test_models/test_nodes.py b/package/tests/test_models/test_nodes.py new file mode 100644 index 0000000000..2d7a59d338 --- /dev/null +++ b/package/tests/test_models/test_nodes.py @@ -0,0 +1,248 @@ +from unittest.mock import call, patch + +import pytest +from kedro.io import MemoryDataset +from kedro.pipeline.node import node +from kedro_datasets.pandas import CSVDataset + +from kedro_viz.models.flowchart.nodes import ( + DataNode, + GraphNode, + ParametersNode, + TaskNode, + TranscodedDataNode, +) + + +def identity(x): + return x + + +class TestGraphNodeCreation: + @pytest.mark.parametrize( + "namespace,expected_modular_pipelines", + [ + (None, set()), + ( + "uk.data_science.model_training", + set( + [ + "uk", + "uk.data_science", + "uk.data_science.model_training", + ] + ), + ), + ], + ) + def test_create_task_node(self, namespace, expected_modular_pipelines): + kedro_node = node( + identity, + inputs="x", + outputs="y", + name="identity_node", + tags={"tag"}, + namespace=namespace, + ) + task_node = GraphNode.create_task_node( + kedro_node, "identity_node", expected_modular_pipelines + ) + assert isinstance(task_node, TaskNode) + assert task_node.kedro_obj is kedro_node + assert task_node.name == "identity_node" + assert task_node.tags == {"tag"} + assert task_node.pipelines == set() + assert task_node.modular_pipelines == expected_modular_pipelines + assert task_node.namespace == namespace + + @pytest.mark.parametrize( + "dataset_name, expected_modular_pipelines", + [ + ("dataset", set()), + ( + "uk.data_science.model_training.dataset", + set( + [ + "uk", + "uk.data_science", + "uk.data_science.model_training", + ] + ), + ), + ], + ) + def test_create_data_node(self, dataset_name, expected_modular_pipelines): + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id=dataset_name, + dataset_name=dataset_name, + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 5, "file_size": 1024}, + modular_pipelines=set(expected_modular_pipelines), + ) + assert isinstance(data_node, DataNode) + assert data_node.kedro_obj is kedro_dataset + assert data_node.id == dataset_name + assert data_node.name == dataset_name + assert data_node.layer == "raw" + assert data_node.tags == set() + assert data_node.pipelines == set() + assert data_node.modular_pipelines == expected_modular_pipelines + assert data_node.stats["rows"] == 10 + assert data_node.stats["columns"] == 5 + assert data_node.stats["file_size"] == 1024 + + @pytest.mark.parametrize( + "transcoded_dataset_name, original_name", + [ + ("dataset@pandas2", "dataset"), + ( + "uk.data_science.model_training.dataset@pandas2", + "uk.data_science.model_training.dataset", + ), + ], + ) + def test_create_transcoded_data_node(self, transcoded_dataset_name, original_name): + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id=original_name, + dataset_name=transcoded_dataset_name, + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 2, "file_size": 1048}, + modular_pipelines=set(), + ) + assert isinstance(data_node, TranscodedDataNode) + assert data_node.id == original_name + assert data_node.name == original_name + assert data_node.layer == "raw" + assert data_node.tags == set() + assert data_node.pipelines == set() + assert data_node.stats["rows"] == 10 + assert data_node.stats["columns"] == 2 + assert data_node.stats["file_size"] == 1048 + + def test_create_parameters_all_parameters(self): + parameters_dataset = MemoryDataset( + data={"test_split_ratio": 0.3, "num_epochs": 1000} + ) + parameters_node = GraphNode.create_parameters_node( + dataset_id="parameters", + dataset_name="parameters", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert parameters_node.id == "parameters" + assert parameters_node.is_all_parameters() + assert not parameters_node.is_single_parameter() + assert parameters_node.parameter_value == { + "test_split_ratio": 0.3, + "num_epochs": 1000, + } + assert not parameters_node.modular_pipelines + + @pytest.mark.parametrize( + "dataset_name,expected_modular_pipelines", + [ + ("params:test_split_ratio", set()), + ( + "params:uk.data_science.model_training.test_split_ratio", + set(["uk", "uk.data_science", "uk.data_science.model_training"]), + ), + ], + ) + def test_create_parameters_node_single_parameter( + self, dataset_name, expected_modular_pipelines + ): + parameters_dataset = MemoryDataset(data=0.3) + parameters_node = GraphNode.create_parameters_node( + dataset_id=dataset_name, + dataset_name=dataset_name, + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=expected_modular_pipelines, + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert not parameters_node.is_all_parameters() + assert parameters_node.is_single_parameter() + assert parameters_node.parameter_value == 0.3 + assert parameters_node.modular_pipelines == expected_modular_pipelines + + def test_create_single_parameter_with_complex_type(self): + parameters_dataset = MemoryDataset(data=object()) + parameters_node = GraphNode.create_parameters_node( + dataset_id="params:test_split_ratio", + dataset_name="params:test_split_ratio", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert not parameters_node.is_all_parameters() + assert parameters_node.is_single_parameter() + assert isinstance(parameters_node.parameter_value, str) + + def test_create_all_parameters_with_complex_type(self): + mock_object = object() + parameters_dataset = MemoryDataset( + data={ + "test_split_ratio": 0.3, + "num_epochs": 1000, + "complex_param": mock_object, + } + ) + parameters_node = GraphNode.create_parameters_node( + dataset_id="parameters", + dataset_name="parameters", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert parameters_node.id == "parameters" + assert parameters_node.is_all_parameters() + assert not parameters_node.is_single_parameter() + assert isinstance(parameters_node.parameter_value, str) + + def test_create_non_existing_parameter_node(self): + """Test the case where ``parameters`` is equal to None""" + parameters_node = GraphNode.create_parameters_node( + dataset_id="non_existing", + dataset_name="non_existing", + layer=None, + tags=set(), + parameters=None, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.parameter_value is None + + @patch("logging.Logger.warning") + def test_create_non_existing_parameter_node_empty_dataset(self, patched_warning): + """Test the case where ``parameters`` is equal to a MemoryDataset with no data""" + parameters_dataset = MemoryDataset() + parameters_node = GraphNode.create_parameters_node( + dataset_id="non_existing", + dataset_name="non_existing", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert parameters_node.parameter_value is None + patched_warning.assert_has_calls( + [call("Cannot find parameter `%s` in the catalog.", "non_existing")] + ) diff --git a/package/tests/test_models/test_pipeline.py b/package/tests/test_models/test_pipeline.py new file mode 100644 index 0000000000..fcd9e08375 --- /dev/null +++ b/package/tests/test_models/test_pipeline.py @@ -0,0 +1,32 @@ +from kedro_datasets.pandas import CSVDataset + +from kedro_viz.models.flowchart.nodes import GraphNode +from kedro_viz.models.flowchart.pipeline import RegisteredPipeline + + +class TestGraphNodePipelines: + def test_registered_pipeline_name(self): + pipeline = RegisteredPipeline(id="__default__") + assert pipeline.name == "__default__" + + def test_modular_pipeline_name(self): + pipeline = GraphNode.create_modular_pipeline_node("data_engineering") + assert pipeline.name == "data_engineering" + + def test_add_node_to_pipeline(self): + default_pipeline = RegisteredPipeline(id="__default__") + another_pipeline = RegisteredPipeline(id="testing") + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id="dataset@transcoded", + dataset_name="dataset@transcoded", + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 2, "file_size": 1048}, + modular_pipelines=set(), + ) + assert data_node.pipelines == set() + data_node.add_pipeline(default_pipeline.id) + assert data_node.belongs_to_pipeline(default_pipeline.id) + assert not data_node.belongs_to_pipeline(another_pipeline.id) From 8a479e5673802ca77e1b7608e8e1fd9e07f93c4a Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 13:18:39 +0000 Subject: [PATCH 14/18] Update node_metadata.py Signed-off-by: Sajid Alam --- package/kedro_viz/models/flowchart/node_metadata.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py index 680610ed2a..20940a9b3a 100644 --- a/package/kedro_viz/models/flowchart/node_metadata.py +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -3,8 +3,6 @@ Kedro metadata in a visualization graph. """ -# pylint: disable=protected-access, missing-function-docstring - import inspect import logging from abc import ABC @@ -248,7 +246,7 @@ def set_preview(cls, _): return cls.dataset.preview() return cls.dataset.preview(**preview_args) - except Exception as exc: # pylint: disable=broad-except + except Exception as exc: # noqa: BLE001 logger.warning( "'%s' could not be previewed. Full exception: %s: %s", cls.data_node.name, @@ -278,7 +276,7 @@ def set_preview_type(cls, _): ) return preview_type_name - except Exception as exc: # pylint: disable=broad-except # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover logger.warning( "'%s' did not have preview type. Full exception: %s: %s", cls.data_node.name, From b387df76b6fc9fd98a30d6bcee594973cc529d01 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 13:54:50 +0000 Subject: [PATCH 15/18] Update ruff.toml Signed-off-by: Sajid Alam --- ruff.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ruff.toml b/ruff.toml index 52a1d6c8f3..d5a27e00ee 100644 --- a/ruff.toml +++ b/ruff.toml @@ -45,7 +45,8 @@ ignore = [ "package/features/steps/sh_run.py" = ["PLW1510"] # `subprocess.run` without explicit `check` argument "*/tests/*.py" = ["SLF", "D", "ARG"] "package/kedro_viz/models/experiment_tracking.py" = ["SLF"] -"package/kedro_viz/models/flowchart.py" = ["SLF"] +"package/kedro_viz/models/nodes.py" = ["SLF"] +"package/kedro_viz/models/node_metadata.py" = ["SLF"] "package/kedro_viz/integrations/kedro/hooks.py" = ["SLF", "BLE"] "package/kedro_viz/integrations/kedro/sqlite_store.py" = ["BLE"] "package/kedro_viz/integrations/kedro/data_loader.py" = ["SLF"] From acf88f081b1cf41e15ccd7c9df39703fea3ddff4 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 14:04:22 +0000 Subject: [PATCH 16/18] lint Signed-off-by: Sajid Alam --- ruff.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ruff.toml b/ruff.toml index d5a27e00ee..166d54a4a7 100644 --- a/ruff.toml +++ b/ruff.toml @@ -45,8 +45,8 @@ ignore = [ "package/features/steps/sh_run.py" = ["PLW1510"] # `subprocess.run` without explicit `check` argument "*/tests/*.py" = ["SLF", "D", "ARG"] "package/kedro_viz/models/experiment_tracking.py" = ["SLF"] -"package/kedro_viz/models/nodes.py" = ["SLF"] -"package/kedro_viz/models/node_metadata.py" = ["SLF"] +"package/kedro_viz/models/flowchart/nodes.py" = ["SLF"] +"package/kedro_viz/models/flowchart/node_metadata.py" = ["SLF"] "package/kedro_viz/integrations/kedro/hooks.py" = ["SLF", "BLE"] "package/kedro_viz/integrations/kedro/sqlite_store.py" = ["BLE"] "package/kedro_viz/integrations/kedro/data_loader.py" = ["SLF"] From e621e981e2bbafc94b42565ad92ee8f014df930f Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Mon, 28 Oct 2024 14:54:09 +0000 Subject: [PATCH 17/18] move test files Signed-off-by: Sajid Alam --- package/tests/test_models/test_flowchart/__init__.py | 0 .../tests/test_models/{ => test_flowchart}/test_node_metadata.py | 0 package/tests/test_models/{ => test_flowchart}/test_nodes.py | 0 package/tests/test_models/{ => test_flowchart}/test_pipeline.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 package/tests/test_models/test_flowchart/__init__.py rename package/tests/test_models/{ => test_flowchart}/test_node_metadata.py (100%) rename package/tests/test_models/{ => test_flowchart}/test_nodes.py (100%) rename package/tests/test_models/{ => test_flowchart}/test_pipeline.py (100%) diff --git a/package/tests/test_models/test_flowchart/__init__.py b/package/tests/test_models/test_flowchart/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/package/tests/test_models/test_node_metadata.py b/package/tests/test_models/test_flowchart/test_node_metadata.py similarity index 100% rename from package/tests/test_models/test_node_metadata.py rename to package/tests/test_models/test_flowchart/test_node_metadata.py diff --git a/package/tests/test_models/test_nodes.py b/package/tests/test_models/test_flowchart/test_nodes.py similarity index 100% rename from package/tests/test_models/test_nodes.py rename to package/tests/test_models/test_flowchart/test_nodes.py diff --git a/package/tests/test_models/test_pipeline.py b/package/tests/test_models/test_flowchart/test_pipeline.py similarity index 100% rename from package/tests/test_models/test_pipeline.py rename to package/tests/test_models/test_flowchart/test_pipeline.py From 63e4b57fb96492fff1f18d1828f80f53dde326ca Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 30 Oct 2024 10:26:29 +0000 Subject: [PATCH 18/18] moved to named_entities.py Signed-off-by: Sajid Alam --- package/kedro_viz/data_access/managers.py | 2 +- .../repositories/registered_pipelines.py | 2 +- .../data_access/repositories/tags.py | 2 +- .../kedro_viz/models/flowchart/model_utils.py | 26 ------------ .../models/flowchart/named_entities.py | 41 +++++++++++++++++++ .../kedro_viz/models/flowchart/pipeline.py | 7 ---- package/kedro_viz/models/flowchart/tag.py | 10 ----- .../tests/test_data_access/test_managers.py | 2 +- .../test_flowchart/test_pipeline.py | 2 +- 9 files changed, 46 insertions(+), 48 deletions(-) create mode 100644 package/kedro_viz/models/flowchart/named_entities.py delete mode 100644 package/kedro_viz/models/flowchart/pipeline.py delete mode 100644 package/kedro_viz/models/flowchart/tag.py diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index c87f40696b..4468804c77 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -22,6 +22,7 @@ from kedro_viz.integrations.utils import UnavailableDataset from kedro_viz.models.flowchart.edge import GraphEdge from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline from kedro_viz.models.flowchart.nodes import ( DataNode, GraphNode, @@ -31,7 +32,6 @@ TaskNode, TranscodedDataNode, ) -from kedro_viz.models.flowchart.pipeline import RegisteredPipeline from kedro_viz.services import layers_services from kedro_viz.utils import _strip_transcoding, is_dataset_param diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index ef665dc63d..1309548fac 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -4,7 +4,7 @@ from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart.pipeline import RegisteredPipeline +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/data_access/repositories/tags.py b/package/kedro_viz/data_access/repositories/tags.py index 43a770a05e..a7bd33e31f 100644 --- a/package/kedro_viz/data_access/repositories/tags.py +++ b/package/kedro_viz/data_access/repositories/tags.py @@ -3,7 +3,7 @@ from typing import Iterable, List, Set -from kedro_viz.models.flowchart.tag import Tag +from kedro_viz.models.flowchart.named_entities import Tag class TagsRepository: diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py index 228d1bb6b6..f12e94b669 100644 --- a/package/kedro_viz/models/flowchart/model_utils.py +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -5,8 +5,6 @@ from types import FunctionType from typing import Any, Dict, Optional -from pydantic import BaseModel, Field, ValidationInfo, field_validator - logger = logging.getLogger(__name__) @@ -35,30 +33,6 @@ def _extract_wrapped_func(func: FunctionType) -> FunctionType: # ============================================================================= -class NamedEntity(BaseModel): - """Represent a named entity (Tag/Registered Pipeline) in a Kedro project - Args: - id (str): Id of the registered pipeline - - Raises: - AssertionError: If id is not supplied during instantiation - """ - - id: str - name: Optional[str] = Field( - default=None, - validate_default=True, - description="The name of the entity", - ) - - @field_validator("name") - @classmethod - def set_name(cls, _, info: ValidationInfo): - """Ensures that the 'name' field is set to the value of 'id' if 'name' is not provided.""" - assert "id" in info.data - return info.data["id"] - - class GraphNodeType(str, Enum): """Represent all possible node types in the graph representation of a Kedro pipeline. The type needs to inherit from str as well so FastAPI can serialise it. See: diff --git a/package/kedro_viz/models/flowchart/named_entities.py b/package/kedro_viz/models/flowchart/named_entities.py new file mode 100644 index 0000000000..65944c0764 --- /dev/null +++ b/package/kedro_viz/models/flowchart/named_entities.py @@ -0,0 +1,41 @@ +"""kedro_viz.models.flowchart.named_entities` defines data models for representing named entities +such as tags and registered pipelines within a Kedro visualization graph.""" + +from typing import Optional + +from pydantic import BaseModel, Field, ValidationInfo, field_validator + + +class NamedEntity(BaseModel): + """Represent a named entity (Tag/Registered Pipeline) in a Kedro project + Args: + id (str): Id of the registered pipeline + + Raises: + AssertionError: If id is not supplied during instantiation + """ + + id: str + name: Optional[str] = Field( + default=None, + validate_default=True, + description="The name of the entity", + ) + + @field_validator("name") + @classmethod + def set_name(cls, _, info: ValidationInfo): + """Ensures that the 'name' field is set to the value of 'id' if 'name' is not provided.""" + assert "id" in info.data + return info.data["id"] + + +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" + + +class Tag(NamedEntity): + """Represent a tag in a Kedro project.""" + + def __hash__(self) -> int: + return hash(self.id) diff --git a/package/kedro_viz/models/flowchart/pipeline.py b/package/kedro_viz/models/flowchart/pipeline.py deleted file mode 100644 index f19f86c445..0000000000 --- a/package/kedro_viz/models/flowchart/pipeline.py +++ /dev/null @@ -1,7 +0,0 @@ -"""`kedro_viz.models.flowchart.pipelines` represent Kedro pipelines in a viz graph.""" - -from .model_utils import NamedEntity - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project.""" diff --git a/package/kedro_viz/models/flowchart/tag.py b/package/kedro_viz/models/flowchart/tag.py deleted file mode 100644 index 357f0137a9..0000000000 --- a/package/kedro_viz/models/flowchart/tag.py +++ /dev/null @@ -1,10 +0,0 @@ -"""`kedro_viz.models.tag` defines data models to represent Kedro tags in a viz graph.""" - -from .model_utils import NamedEntity - - -class Tag(NamedEntity): - """Represent a tag in a Kedro project.""" - - def __hash__(self) -> int: - return hash(self.id) diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index b12997d67e..abb8df9be5 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -16,13 +16,13 @@ ) from kedro_viz.integrations.utils import UnavailableDataset from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.named_entities import Tag from kedro_viz.models.flowchart.nodes import ( DataNode, ParametersNode, TaskNode, TranscodedDataNode, ) -from kedro_viz.models.flowchart.tag import Tag def identity(x): diff --git a/package/tests/test_models/test_flowchart/test_pipeline.py b/package/tests/test_models/test_flowchart/test_pipeline.py index fcd9e08375..520aff01d9 100644 --- a/package/tests/test_models/test_flowchart/test_pipeline.py +++ b/package/tests/test_models/test_flowchart/test_pipeline.py @@ -1,7 +1,7 @@ from kedro_datasets.pandas import CSVDataset +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline from kedro_viz.models.flowchart.nodes import GraphNode -from kedro_viz.models.flowchart.pipeline import RegisteredPipeline class TestGraphNodePipelines: