kedro-org · rashidakanchwala · Feb 21, 2024 · Nov 20, 2023 · Nov 20, 2023 · Nov 22, 2023
@@ -6,7 +6,7 @@ jupyter~=1.0
 jupyter_client>=5.1, <7.0
 jupyterlab~=3.0
 kedro~=0.18.0
-kedro-datasets[pandas.CSVDataset,pandas.ExcelDataset, pandas.ParquetDataset, plotly.PlotlyDataset]<=2.0.0
+git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets  # temporary pin until the next release of kedro-datasets
 nbstripout~=0.4
 pytest-cov~=2.5
 pytest-mock>=1.7.1, <2.0

@@ -1,5 +1,5 @@
 kedro>=0.18.0
-kedro-datasets[pandas.CSVDataset,pandas.ExcelDataset, pandas.ParquetDataset, plotly.PlotlyDataset, matplotlib.MatplotlibWriter]<=2.0.0
+git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets  # temporary pin until the next release of kedro-datasets
 scikit-learn~=1.0
 pillow~=9.0
 seaborn~=0.11.2
@@ -131,11 +131,9 @@ class Config:
 class DataNodeMetadataAPIResponse(BaseAPIResponse):
     filepath: Optional[str]
     type: str
-    plot: Optional[Dict]
-    image: Optional[str]
-    tracking_data: Optional[Dict]
     run_command: Optional[str]
-    preview: Optional[Dict]
+    preview: Optional[Union[Dict, str]]
+    preview_type: Optional[str]
     stats: Optional[Dict]
 
     class Config:

@@ -8,8 +8,8 @@
     TRACKING_DATASET_GROUPS,
     TrackingDatasetGroup,
     TrackingDatasetModel,
-    get_dataset_type,
 )
+from kedro_viz.models.utils import get_dataset_type
 
 if TYPE_CHECKING:
     try:

@@ -3,9 +3,7 @@
 load data from projects created in a range of Kedro versions.
 """
 # pylint: disable=import-outside-toplevel, protected-access
-# pylint: disable=missing-function-docstring
 
-import base64
 import json
 import logging
 from pathlib import Path
@@ -14,24 +12,7 @@
 from kedro import __version__
 from kedro.framework.session import KedroSession
 from kedro.framework.session.store import BaseSessionStore
-
-try:
-    from kedro_datasets import (  # isort:skip
-        json as json_dataset,
-        matplotlib,
-        plotly,
-        tracking,
-    )
-except ImportError:  # kedro_datasets is not installed.
-    from kedro.extras.datasets import (  # Safe since ImportErrors are suppressed within kedro.
-        json as json_dataset,
-        matplotlib,
-        plotly,
-        tracking,
-    )
-
 from kedro.io import DataCatalog
-from kedro.io.core import get_filepath_str
 from kedro.pipeline import Pipeline
 
 logger = logging.getLogger(__name__)
@@ -129,50 +110,3 @@ def load_data(
         stats_dict = _get_dataset_stats(project_path)
 
     return catalog, pipelines_dict, session_store, stats_dict
-
-
-# Try to access the attribute to trigger the import of dependencies, only modify the _load
-# if dependencies are installed.
-# These datasets do not have _load methods defined (tracking and matplotlib) or do not
-# load to json (plotly), hence the need to define _load here.
-try:
-    getattr(matplotlib, "MatplotlibWriter")  # Trigger the lazy import
-
-    def matplotlib_writer_load(dataset: matplotlib.MatplotlibWriter) -> str:
-        load_path = get_filepath_str(dataset._get_load_path(), dataset._protocol)
-        with dataset._fs.open(load_path, mode="rb") as img_file:
-            base64_bytes = base64.b64encode(img_file.read())
-        return base64_bytes.decode("utf-8")
-
-    matplotlib.MatplotlibWriter._load = matplotlib_writer_load
-except (ImportError, AttributeError):
-    pass
-
-try:
-    getattr(plotly, "JSONDataset")  # Trigger import
-    plotly.JSONDataset._load = json_dataset.JSONDataset._load
-except (ImportError, AttributeError):
-    getattr(plotly, "JSONDataSet")  # Trigger import
-    plotly.JSONDataSet._load = json_dataset.JSONDataSet._load
-
-
-try:
-    getattr(plotly, "PlotlyDataset")  # Trigger import
-    plotly.PlotlyDataset._load = json_dataset.JSONDataset._load
-except (ImportError, AttributeError):
-    getattr(plotly, "PlotlyDataSet")  # Trigger import
-    plotly.PlotlyDataSet._load = json_dataset.JSONDataSet._load
-
-try:
-    getattr(tracking, "JSONDataset")  # Trigger import
-    tracking.JSONDataset._load = json_dataset.JSONDataset._load
-except (ImportError, AttributeError):
-    getattr(tracking, "JSONDataSet")  # Trigger import
-    tracking.JSONDataSet._load = json_dataset.JSONDataSet._load
-
-try:
-    getattr(tracking, "MetricsDataset")  # Trigger import
-    tracking.MetricsDataset._load = json_dataset.JSONDataset._load
-except (ImportError, AttributeError):
-    getattr(tracking, "MetricsDataSet")  # Trigger import
-    tracking.MetricsDataSet._load = json_dataset.JSONDataSet._load
@@ -63,7 +63,7 @@ class TrackingDatasetGroup(str, Enum):
     JSON = "json"
 
 
-# Map dataset types (as produced by get_dataset_type) to their group
+# Map dataset types to their group
 TRACKING_DATASET_GROUPS = {
     "plotly.plotly_dataset.PlotlyDataset": TrackingDatasetGroup.PLOT,
     "plotly.json_dataset.JSONDataset": TrackingDatasetGroup.PLOT,
@@ -110,9 +110,11 @@ def load_tracking_data(self, run_id: str):
 
         try:
             if TRACKING_DATASET_GROUPS[self.dataset_type] is TrackingDatasetGroup.PLOT:
-                self.runs[run_id] = {self.dataset._filepath.name: self.dataset.load()}
+                self.runs[run_id] = {
+                    self.dataset._filepath.name: self.dataset.preview()  # type: ignore
+                }
             else:
-                self.runs[run_id] = self.dataset.load()
+                self.runs[run_id] = self.dataset.preview()  # type: ignore
         except Exception as exc:  # pylint: disable=broad-except # pragma: no cover
             logger.warning(
                 "'%s' with version '%s' could not be loaded. Full exception: %s: %s",

@@ -1,4 +1,5 @@
 """`kedro_viz.models.flowchart` defines data models to represent Kedro entities in a viz graph."""
+
 # pylint: disable=protected-access, missing-function-docstring
 import abc
 import hashlib
@@ -586,52 +587,16 @@ def set_viz_metadata(cls, _, values):
 
         return None
 
-    # TODO: improve this scheme.
-    def is_plot_node(self):
-        """Check if the current node is a plot node.
-        Currently it only recognises one underlying dataset as a plot node.
-        In the future, we might want to make this generic.
-        """
-        return self.dataset_type in (
-            "plotly.plotly_dataset.PlotlyDataset",
-            "plotly.json_dataset.JSONDataset",
-            "plotly.plotly_dataset.PlotlyDataSet",
-            "plotly.json_dataset.JSONDataSet",
-        )
-
-    def is_image_node(self):
-        """Check if the current node is a matplotlib image node."""
-        return self.dataset_type == "matplotlib.matplotlib_writer.MatplotlibWriter"
-
-    def is_metric_node(self):
-        """Check if the current node is a metrics node."""
-        return self.dataset_type in (
-            "tracking.metrics_dataset.MetricsDataset",
-            "tracking.metrics_dataset.MetricsDataSet",
-        )
-
-    def is_json_node(self):
-        """Check if the current node is a JSONDataset node."""
-        return self.dataset_type in (
-            "tracking.json_dataset.JSONDataset",
-            "tracking.json_dataset.JSONDataSet",
-        )
-
-    def is_tracking_node(self):
-        """Checks if the current node is a tracking data node"""
-        return self.is_json_node() or self.is_metric_node()
-
-    def is_preview_node(self):
-        """Checks if the current node has a preview"""
-        if not (self.viz_metadata and self.viz_metadata.get("preview_args", None)):
-            return False
-
-        return True
-
     def get_preview_args(self):
         """Gets the preview arguments for a dataset"""
         return self.viz_metadata.get("preview_args", None)
 
+    def is_preview_disabled(self):
+        """Checks if the dataset has a preview disabled"""
+        return (
+            self.viz_metadata is not None and self.viz_metadata.get("preview") is False
+        )
+
 
 class TranscodedDataNode(GraphNode):
     """Represent a graph node of type data
@@ -718,24 +683,15 @@ class DataNodeMetadata(GraphNodeMetadata):
     # The path to the actual data file for the underlying dataset
     filepath: Optional[str]
 
-    plot: Optional[Dict] = Field(
-        None, description="The optional plot data if the underlying dataset has a plot"
-    )
-
-    # The image data if the underlying dataset has a image
-    # currently only applicable for matplotlib.MatplotlibWriter
-    image: Optional[str] = Field(
-        None, description="The image data if the underlying dataset has a image"
-    )
-    tracking_data: Optional[Dict] = Field(
-        None,
-        description="The tracking data if the underlying dataset has a tracking dataset",
-    )
     run_command: Optional[str] = Field(
         None, description="Command to run the pipeline to this node"
     )
-    preview: Optional[Dict] = Field(
-        None, description="Preview data for the underlying datanode"
+    preview: Optional[Union[Dict, str]] = Field(
+        None, description="Preview data for the underlying data node"
+    )
+
+    preview_type: Optional[str] = Field(
+        None, description="Type of preview for the dataset"
     )
     stats: Optional[Dict] = Field(None, description="The statistics for the data node.")
 
@@ -769,35 +725,39 @@ def set_run_command(cls, _):
             return f"kedro run --to-outputs={cls.data_node.name}"
         return None
 
-    @validator("plot", always=True)
-    def set_plot(cls, _):
-        if cls.data_node.is_plot_node():
-            return cls.data_node.kedro_obj.load()
-        return None
+    @validator("preview", always=True)
+    def set_preview(cls, _):
+        if cls.data_node.is_preview_disabled() or not hasattr(cls.dataset, "preview"):
+            return None
 
-    @validator("image", always=True)
-    def set_image(cls, _):
-        if cls.data_node.is_image_node():
-            return cls.data_node.kedro_obj.load()
-        return None
+        try:
+            preview_args = (
+                cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None
+            )
+            if preview_args is None:
+                return cls.dataset.preview()
+            return cls.dataset.preview(**preview_args)
 
-    @validator("tracking_data", always=True)
-    def set_tracking_data(cls, _):
-        if cls.data_node.is_tracking_node():
-            return cls.data_node.kedro_obj.load()
-        return None
+        except Exception as exc:  # pylint: disable=broad-except
+            logger.warning(
+                "'%s' could not be previewed. Full exception: %s: %s",
+                cls.data_node.name,
+                type(exc).__name__,
+                exc,
+            )
+            return None
 
-    @validator("preview", always=True)
-    def set_preview(cls, _):
-        if not (cls.data_node.is_preview_node() and hasattr(cls.dataset, "_preview")):
+    @validator("preview_type", always=True)
+    def set_preview_type(cls, _):
+        if cls.data_node.is_preview_disabled() or not hasattr(cls.dataset, "preview"):
             return None
 
         try:
-            return cls.dataset._preview(**cls.data_node.get_preview_args())
+            return inspect.signature(cls.dataset.preview).return_annotation.__name__
 
         except Exception as exc:  # pylint: disable=broad-except # pragma: no cover
             logger.warning(
-                "'%s' could not be previewed. Full exception: %s: %s",
+                "'%s' did not have preview type. Full exception: %s: %s",
                 cls.data_node.name,
                 type(exc).__name__,
                 exc,

@@ -1,7 +1,7 @@
 -r requirements.txt
 
 kedro >=0.18.0
-kedro-datasets[pandas.ParquetDataset, pandas.CSVDataset, pandas.ExcelDataset, plotly.JSONDataset]<=2.0.0
+git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets  # temporary pin until the next release of kedro-datasets
 kedro-telemetry>=0.1.1  # for testing telemetry integration
 bandit~=1.7
 behave~=1.2

@@ -267,11 +267,17 @@ def json(self):
 @pytest.fixture
 def example_data_frame():
     data = {
-        "id": ["35029", "30292"],
-        "company_rating": ["100%", "67%"],
-        "company_location": ["Niue", "Anguilla"],
-        "total_fleet_count": ["4.0", "6.0"],
-        "iata_approved": ["f", "f"],
+        "id": ["35029", "30292", "12345", "67890", "54321", "98765", "11111"],
+        "company_rating": ["100%", "67%", "80%", "95%", "72%", "88%", "75%"],
+        "company_location": [
+            "Niue",
+            "Anguilla",
+            "Barbados",
+            "Fiji",
+            "Grenada",
+            "Jamaica",
+            "Trinidad and Tobago",
+        ],
     }
     yield pd.DataFrame(data)
 
@@ -292,10 +298,32 @@ def example_csv_dataset(tmp_path, example_data_frame):
 
 
 @pytest.fixture
-def example_data_node():
+def example_csv_filepath(tmp_path, example_data_frame):
+    csv_file_path = tmp_path / "temporary_test_data.csv"
+    example_data_frame.to_csv(csv_file_path, index=False)
+    yield csv_file_path
+
+
+@pytest.fixture
+def example_data_node(example_csv_filepath):
     dataset_name = "uk.data_science.model_training.dataset"
     metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}}
-    kedro_dataset = CSVDataset(filepath="test.csv", metadata=metadata)
+    kedro_dataset = CSVDataset(filepath=example_csv_filepath, metadata=metadata)
+    data_node = GraphNode.create_data_node(
+        dataset_name=dataset_name,
+        layer="raw",
+        tags=set(),
+        dataset=kedro_dataset,
+        stats={"rows": 10, "columns": 5, "file_size": 1024},
+    )
+
+    yield data_node
+
+
+@pytest.fixture
+def example_data_node_without_viz_metadata(example_csv_filepath):
+    dataset_name = "uk.data_science.model_training.dataset"
+    kedro_dataset = CSVDataset(filepath=example_csv_filepath)
     data_node = GraphNode.create_data_node(
         dataset_name=dataset_name,
         layer="raw",

@@ -82,7 +82,11 @@ class TestNodeMetadataEndpoint:
             (
                 "13399a82",
                 200,
-                {"filepath": "raw_data.csv", "type": "pandas.csv_dataset.CSVDataset"},
+                {
+                    "filepath": "raw_data.csv",
+                    "preview_type": "TablePreview",
+                    "type": "pandas.csv_dataset.CSVDataset",
+                },
             ),
         ],
     )