From 48ad39229820a081693f6f87781d4973e7a01661 Mon Sep 17 00:00:00 2001 From: rashidakanchwala <37628668+rashidakanchwala@users.noreply.github.com> Date: Thu, 8 Jun 2023 19:19:34 +0100 Subject: [PATCH] Add preview to datasets as specified in the Kedro catalog under metadata (#1374) This ticket is part 2 of the preview dataset feature development. In the first part, we enabled preview functionality for CSV and Excel datasets in the Kedro Framework. In Kedro-viz, we showed previews for all CSV and Excel datasets when the user turned on the preview option under the settings panel. In this ticket, we are removing the preview option from the settings panel. Now, if the user wants to preview a dataset, they need to specify it via the catalog.yml file. type: pandas.CSVDataSet filepath: /filepath/to/dataset metadata: kedro-viz: layer: raw preview_args: nrows: 3 In the above the user wants to see the first 3 rows of the dataset. Previously we always showed the first 40 rows, but now we show the number of rows specified by the user in the catalog.yml file. --- RELEASE.md | 3 ++- demo-project/conf/base/catalog_01_raw.yml | 12 ++++++++-- package/kedro_viz/models/flowchart.py | 20 ++++++++++++++-- package/tests/test_models/test_flowchart.py | 24 +++++++++++++++++++ .../metadata-modal/metadata-modal.js | 2 +- src/components/metadata/metadata.js | 4 +--- src/config.js | 6 ----- 7 files changed, 56 insertions(+), 15 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0e77545632..bd12f42646 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -15,7 +15,8 @@ Please follow the established format: ## Major features and improvements -- Add support for new kedro-viz layer configuration in kedro-datasets. (#1373) +- Add support for new kedro-viz layer configuration in kedro-datasets version 1.3.0 or more. +- Add support for new kedro-viz preview_args configuration in kedro-datasets version 1.3.0 or more. # Release 6.2.0 diff --git a/demo-project/conf/base/catalog_01_raw.yml b/demo-project/conf/base/catalog_01_raw.yml index 706393a56f..d355439a04 100644 --- a/demo-project/conf/base/catalog_01_raw.yml +++ b/demo-project/conf/base/catalog_01_raw.yml @@ -1,15 +1,21 @@ companies: type: pandas.CSVDataSet filepath: ${base_location}/01_raw/companies.csv - layer: raw + metadata: + kedro-viz: + layer: raw + preview_args: + nrows: 5 reviews: type: pandas.CSVDataSet filepath: ${base_location}/01_raw/reviews.csv metadata: kedro-viz: + layer: raw preview_args: - nrows: 4 + nrows: 10 + shuttles: type: pandas.ExcelDataSet @@ -17,5 +23,7 @@ shuttles: metadata: kedro-viz: layer: raw + preview_args: + nrows: 15 diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py index e67272110a..fccd1ad2c0 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart.py @@ -449,6 +449,8 @@ class DataNode(GraphNode): # the list of modular pipelines this data node belongs to modular_pipelines: List[str] = field(init=False) + viz_metadata: Optional[Dict] = field(init=False) + # command to run the pipeline to this node run_command: Optional[str] = field(init=False, default=None) @@ -465,6 +467,12 @@ def __post_init__(self): self.modular_pipelines = self._expand_namespaces( self._get_namespace(self.full_name) ) + metadata = getattr(self.kedro_obj, "metadata", None) + if metadata: + try: + self.viz_metadata = metadata["kedro-viz"] + except (AttributeError, KeyError): # pragma: no cover + logger.debug("Kedro-viz metadata not found for %s", self.full_name) # TODO: improve this scheme. def is_plot_node(self): @@ -495,7 +503,15 @@ def is_tracking_node(self): def is_preview_node(self): """Checks if the current node has a preview""" - return hasattr(self.kedro_obj, "_preview") + try: + is_preview = bool(self.viz_metadata["preview_args"]) + except (AttributeError, KeyError): + return False + return is_preview + + def get_preview_args(self): + """Gets the preview arguments for a dataset""" + return self.viz_metadata["preview_args"] @dataclass @@ -602,7 +618,7 @@ def __post_init__(self, data_node: DataNode): self.tracking_data = dataset.load() elif data_node.is_preview_node(): try: - self.preview = dataset._preview() # type: ignore + self.preview = dataset._preview(**data_node.get_preview_args()) # type: ignore except Exception as exc: # pylint: disable=broad-except # pragma: no cover logger.warning( "'%s' could not be previewed. Full exception: %s: %s", diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index abddef5841..6dec2e9968 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -1,3 +1,4 @@ +# pylint: disable=too-many-public-methods import base64 from functools import partial from pathlib import Path @@ -368,6 +369,29 @@ def test_data_node_metadata(self): assert data_node_metadata.filepath == "/tmp/dataset.csv" assert data_node_metadata.run_command == "kedro run --to-outputs=dataset" + def test_preview_args_not_exist(self): + metadata = {"kedro-viz": {"something": 3}} + dataset = CSVDataSet(filepath="test.csv", metadata=metadata) + data_node = GraphNode.create_data_node( + full_name="dataset", + tags=set(), + layer=None, + dataset=dataset, + ) + assert not data_node.is_preview_node() + + def test_get_preview_args(self): + metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}} + dataset = CSVDataSet(filepath="test.csv", metadata=metadata) + data_node = GraphNode.create_data_node( + full_name="dataset", + tags=set(), + layer=None, + dataset=dataset, + ) + assert data_node.is_preview_node() + assert data_node.get_preview_args() == {"nrows": 3} + def test_preview_data_node_metadata(self): mock_preview_data = { "columns": ["id", "company_rating", "company_location"], diff --git a/src/components/metadata-modal/metadata-modal.js b/src/components/metadata-modal/metadata-modal.js index 51c1614fae..98dc744657 100644 --- a/src/components/metadata-modal/metadata-modal.js +++ b/src/components/metadata-modal/metadata-modal.js @@ -46,7 +46,7 @@ const MetadataModal = ({ metadata, onToggle, visible }) => { {hasPreview && (