Skip to content

Commit

Permalink
Add preview to datasets as specified in the Kedro catalog under metad…
Browse files Browse the repository at this point in the history
…ata (#1374)

This ticket is part 2 of the preview dataset feature development. In the first part, we enabled preview functionality for CSV and Excel datasets in the Kedro Framework. In Kedro-viz, we showed previews for all CSV and Excel datasets when the user turned on the preview option under the settings panel.

In this ticket, we are removing the preview option from the settings panel. Now, if the user wants to preview a dataset, they need to specify it via the catalog.yml file.

type: pandas.CSVDataSet
filepath: /filepath/to/dataset
metadata:
  kedro-viz:
    layer: raw
    preview_args:
          nrows: 3
In the above the user wants to see the first 3 rows of the dataset. Previously we always showed the first 40 rows, but now we show the number of rows specified by the user in the catalog.yml file.
  • Loading branch information
rashidakanchwala authored Jun 8, 2023
1 parent a26e304 commit 48ad392
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 15 deletions.
3 changes: 2 additions & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ Please follow the established format:

## Major features and improvements

- Add support for new kedro-viz layer configuration in kedro-datasets. (#1373)
- Add support for new kedro-viz layer configuration in kedro-datasets version 1.3.0 or more.
- Add support for new kedro-viz preview_args configuration in kedro-datasets version 1.3.0 or more.

# Release 6.2.0

Expand Down
12 changes: 10 additions & 2 deletions demo-project/conf/base/catalog_01_raw.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
companies:
type: pandas.CSVDataSet
filepath: ${base_location}/01_raw/companies.csv
layer: raw
metadata:
kedro-viz:
layer: raw
preview_args:
nrows: 5

reviews:
type: pandas.CSVDataSet
filepath: ${base_location}/01_raw/reviews.csv
metadata:
kedro-viz:
layer: raw
preview_args:
nrows: 4
nrows: 10


shuttles:
type: pandas.ExcelDataSet
filepath: ${base_location}/01_raw/shuttles.xlsx
metadata:
kedro-viz:
layer: raw
preview_args:
nrows: 15


20 changes: 18 additions & 2 deletions package/kedro_viz/models/flowchart.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ class DataNode(GraphNode):
# the list of modular pipelines this data node belongs to
modular_pipelines: List[str] = field(init=False)

viz_metadata: Optional[Dict] = field(init=False)

# command to run the pipeline to this node
run_command: Optional[str] = field(init=False, default=None)

Expand All @@ -465,6 +467,12 @@ def __post_init__(self):
self.modular_pipelines = self._expand_namespaces(
self._get_namespace(self.full_name)
)
metadata = getattr(self.kedro_obj, "metadata", None)
if metadata:
try:
self.viz_metadata = metadata["kedro-viz"]
except (AttributeError, KeyError): # pragma: no cover
logger.debug("Kedro-viz metadata not found for %s", self.full_name)

# TODO: improve this scheme.
def is_plot_node(self):
Expand Down Expand Up @@ -495,7 +503,15 @@ def is_tracking_node(self):

def is_preview_node(self):
"""Checks if the current node has a preview"""
return hasattr(self.kedro_obj, "_preview")
try:
is_preview = bool(self.viz_metadata["preview_args"])
except (AttributeError, KeyError):
return False
return is_preview

def get_preview_args(self):
"""Gets the preview arguments for a dataset"""
return self.viz_metadata["preview_args"]


@dataclass
Expand Down Expand Up @@ -602,7 +618,7 @@ def __post_init__(self, data_node: DataNode):
self.tracking_data = dataset.load()
elif data_node.is_preview_node():
try:
self.preview = dataset._preview() # type: ignore
self.preview = dataset._preview(**data_node.get_preview_args()) # type: ignore
except Exception as exc: # pylint: disable=broad-except # pragma: no cover
logger.warning(
"'%s' could not be previewed. Full exception: %s: %s",
Expand Down
24 changes: 24 additions & 0 deletions package/tests/test_models/test_flowchart.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=too-many-public-methods
import base64
from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -368,6 +369,29 @@ def test_data_node_metadata(self):
assert data_node_metadata.filepath == "/tmp/dataset.csv"
assert data_node_metadata.run_command == "kedro run --to-outputs=dataset"

def test_preview_args_not_exist(self):
metadata = {"kedro-viz": {"something": 3}}
dataset = CSVDataSet(filepath="test.csv", metadata=metadata)
data_node = GraphNode.create_data_node(
full_name="dataset",
tags=set(),
layer=None,
dataset=dataset,
)
assert not data_node.is_preview_node()

def test_get_preview_args(self):
metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}}
dataset = CSVDataSet(filepath="test.csv", metadata=metadata)
data_node = GraphNode.create_data_node(
full_name="dataset",
tags=set(),
layer=None,
dataset=dataset,
)
assert data_node.is_preview_node()
assert data_node.get_preview_args() == {"nrows": 3}

def test_preview_data_node_metadata(self):
mock_preview_data = {
"columns": ["id", "company_rating", "company_location"],
Expand Down
2 changes: 1 addition & 1 deletion src/components/metadata-modal/metadata-modal.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const MetadataModal = ({ metadata, onToggle, visible }) => {
</div>
{hasPreview && (
<div className="pipeline-metadata-modal__preview-text">
Previewing first 40 rows only
Previewing first {metadata.preview.data.length} rows
</div>
)}
</div>
Expand Down
4 changes: 1 addition & 3 deletions src/components/metadata/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import './styles/metadata.css';
* Shows node meta data
*/
const MetaData = ({
flags,
isPrettyNameOn,
metadata,
onToggleCode,
Expand Down Expand Up @@ -58,7 +57,7 @@ const MetaData = ({
const hasPlot = Boolean(metadata?.plot);
const hasImage = Boolean(metadata?.image);
const hasTrackingData = Boolean(metadata?.trackingData);
const hasPreviewData = Boolean(metadata?.preview) && flags.previewDataSet;
const hasPreviewData = Boolean(metadata?.preview);
const isMetricsTrackingDataset = nodeTypeIcon === 'metricsTracking';
const hasCode = Boolean(metadata?.code);
const isTranscoded = Boolean(metadata?.originalType);
Expand Down Expand Up @@ -326,7 +325,6 @@ const MetaData = ({
};

export const mapStateToProps = (state, ownProps) => ({
flags: state.flags,
isPrettyNameOn: state.prettyName,
metadata: getClickedNodeMetaData(state),
theme: state.theme,
Expand Down
6 changes: 0 additions & 6 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,6 @@ export const flags = {
default: false,
icon: '🔛',
},
previewDataSet: {
name: 'Preview datasets',
description: 'Enable dataset previews in the metadata panel',
default: true,
icon: '🗂',
},
};

export const settings = {
Expand Down

0 comments on commit 48ad392

Please sign in to comment.