-
Notifications
You must be signed in to change notification settings - Fork 113
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Lightweight Kedro Viz Experimentation using AST (#1966)
* merge main from remote Signed-off-by: ravi-kumar-pilla <[email protected]> * partially working parser - WIP Signed-off-by: ravi-kumar-pilla <[email protected]> * partial working commit Signed-off-by: ravi-kumar-pilla <[email protected]> * testing show code Signed-off-by: ravi-kumar-pilla <[email protected]> * adjust file permissions Signed-off-by: ravi-kumar-pilla <[email protected]> * update comments and rename parser file Signed-off-by: ravi-kumar-pilla <[email protected]> * remove gitignore Signed-off-by: ravi-kumar-pilla <[email protected]> * handle func lambda case Signed-off-by: ravi-kumar-pilla <[email protected]> * mocking working draft proposal * reuse session with mock modules * wip integration tests * sporadic working needs testing * update sys modules with patch * fix lint and pytests * add dataset factories test * add e2e test * fix CI * dataset factory pattern support in lite mode * add doc strings * add e2e test and clear unused func * testing relative to absolute imports * testing relative imports * working draft for relative imports multi-level * remove resolving relative dependencies * test * working draft * modify test and standalone support for lite * improve readability * fix lint and pytest * revert link redirect * remove side effects * pr suggestions addressed * fix dict issue * moved package check under dirs and add exception block --------- Signed-off-by: ravi-kumar-pilla <[email protected]>
- Loading branch information
1 parent
8620181
commit 023a05b
Showing
12 changed files
with
1,071 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
"""``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog`` | ||
to provide a MemoryDataset instance when running Kedro-Viz in lite mode. | ||
""" | ||
|
||
import copy | ||
from typing import Any, Optional | ||
|
||
from kedro.io.core import AbstractDataset, DatasetError, generate_timestamp | ||
from kedro.io.data_catalog import DataCatalog, _resolve_credentials | ||
from kedro.io.memory_dataset import MemoryDataset | ||
|
||
|
||
class DataCatalogLite(DataCatalog): | ||
"""``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog`` | ||
to provide a MemoryDataset instance by overriding ``from_config`` of ``DataCatalog`` | ||
when running Kedro-Viz in lite mode. | ||
""" | ||
|
||
@classmethod | ||
def from_config( | ||
cls, | ||
catalog: Optional[dict[str, dict[str, Any]]], | ||
credentials: Optional[dict[str, dict[str, Any]]] = None, | ||
load_versions: Optional[dict[str, str]] = None, | ||
save_version: Optional[str] = None, | ||
) -> DataCatalog: | ||
datasets = {} | ||
dataset_patterns = {} | ||
catalog = copy.deepcopy(catalog) or {} | ||
credentials = copy.deepcopy(credentials) or {} | ||
save_version = save_version or generate_timestamp() | ||
load_versions = copy.deepcopy(load_versions) or {} | ||
user_default = {} | ||
|
||
for ds_name, ds_config in catalog.items(): | ||
if not isinstance(ds_config, dict): | ||
raise DatasetError( | ||
f"Catalog entry '{ds_name}' is not a valid dataset configuration. " | ||
"\nHint: If this catalog entry is intended for variable interpolation, " | ||
"make sure that the key is preceded by an underscore." | ||
) | ||
|
||
try: | ||
ds_config = _resolve_credentials( | ||
ds_config, credentials | ||
) # noqa: PLW2901 | ||
if cls._is_pattern(ds_name): | ||
# Add each factory to the dataset_patterns dict. | ||
dataset_patterns[ds_name] = ds_config | ||
|
||
else: | ||
try: | ||
datasets[ds_name] = AbstractDataset.from_config( | ||
ds_name, ds_config, load_versions.get(ds_name), save_version | ||
) | ||
except DatasetError: | ||
# pylint: disable=abstract-class-instantiated | ||
datasets[ds_name] = MemoryDataset() # type: ignore[abstract] | ||
except KeyError: | ||
# pylint: disable=abstract-class-instantiated | ||
datasets[ds_name] = MemoryDataset() # type: ignore[abstract] | ||
|
||
sorted_patterns = cls._sort_patterns(dataset_patterns) | ||
if sorted_patterns: | ||
# If the last pattern is a catch-all pattern, pop it and set it as the default | ||
if cls._specificity(list(sorted_patterns.keys())[-1]) == 0: | ||
last_pattern = sorted_patterns.popitem() | ||
user_default = {last_pattern[0]: last_pattern[1]} | ||
|
||
return cls( | ||
datasets=datasets, | ||
dataset_patterns=sorted_patterns, | ||
load_versions=load_versions, | ||
save_version=save_version, | ||
default_pattern=user_default, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,18 +7,22 @@ | |
|
||
import json | ||
import logging | ||
import sys | ||
from pathlib import Path | ||
from typing import Any, Dict, Optional, Tuple | ||
from typing import Any, Dict, Optional, Set, Tuple | ||
from unittest.mock import patch | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
ravi-kumar-pilla
Author
Contributor
|
||
|
||
from kedro import __version__ | ||
from kedro.framework.project import configure_project, pipelines | ||
from kedro.framework.project import configure_project, pipelines, settings | ||
from kedro.framework.session import KedroSession | ||
from kedro.framework.session.store import BaseSessionStore | ||
from kedro.framework.startup import bootstrap_project | ||
from kedro.io import DataCatalog | ||
from kedro.pipeline import Pipeline | ||
|
||
from kedro_viz.constants import VIZ_METADATA_ARGS | ||
from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite | ||
from kedro_viz.integrations.kedro.lite_parser import LiteParser | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -69,33 +73,29 @@ def _get_dataset_stats(project_path: Path) -> Dict: | |
return {} | ||
|
||
|
||
def load_data( | ||
def _load_data_helper( | ||
project_path: Path, | ||
env: Optional[str] = None, | ||
include_hooks: bool = False, | ||
package_name: Optional[str] = None, | ||
extra_params: Optional[Dict[str, Any]] = None, | ||
) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]: | ||
"""Load data from a Kedro project. | ||
is_lite: bool = False, | ||
): | ||
"""Helper to load data from a Kedro project. | ||
Args: | ||
project_path: the path where the Kedro project is located. | ||
env: the Kedro environment to load the data. If not provided. | ||
it will use Kedro default, which is local. | ||
include_hooks: A flag to include all registered hooks in your Kedro Project. | ||
package_name: The name of the current package | ||
extra_params: Optional dictionary containing extra project parameters | ||
for underlying KedroContext. If specified, will update (and therefore | ||
take precedence over) the parameters retrieved from the project | ||
configuration. | ||
is_lite: A flag to run Kedro-Viz in lite mode. | ||
Returns: | ||
A tuple containing the data catalog and the pipeline dictionary | ||
and the session store. | ||
A tuple containing the data catalog, pipeline dictionary, session store | ||
and dataset stats dictionary. | ||
""" | ||
if package_name: | ||
configure_project(package_name) | ||
else: | ||
# bootstrap project when viz is run in dev mode | ||
bootstrap_project(project_path) | ||
|
||
with KedroSession.create( | ||
project_path=project_path, | ||
|
@@ -109,12 +109,81 @@ def load_data( | |
|
||
context = session.load_context() | ||
session_store = session._store | ||
|
||
# Update the DataCatalog class for a custom implementation | ||
# to handle kedro.io.core.DatasetError from | ||
# `settings.DATA_CATALOG_CLASS.from_config` | ||
if is_lite: | ||
settings.DATA_CATALOG_CLASS = DataCatalogLite | ||
|
||
catalog = context.catalog | ||
|
||
# Pipelines is a lazy dict-like object, so we force it to populate here | ||
# in case user doesn't have an active session down the line when it's first accessed. | ||
# Useful for users who have `get_current_session` in their `register_pipelines()`. | ||
pipelines_dict = dict(pipelines) | ||
stats_dict = _get_dataset_stats(project_path) | ||
|
||
return catalog, pipelines_dict, session_store, stats_dict | ||
|
||
|
||
def load_data( | ||
project_path: Path, | ||
env: Optional[str] = None, | ||
include_hooks: bool = False, | ||
package_name: Optional[str] = None, | ||
extra_params: Optional[Dict[str, Any]] = None, | ||
is_lite: bool = False, | ||
) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]: | ||
"""Load data from a Kedro project. | ||
Args: | ||
project_path: the path where the Kedro project is located. | ||
env: the Kedro environment to load the data. If not provided. | ||
it will use Kedro default, which is local. | ||
include_hooks: A flag to include all registered hooks in your Kedro Project. | ||
package_name: The name of the current package | ||
extra_params: Optional dictionary containing extra project parameters | ||
for underlying KedroContext. If specified, will update (and therefore | ||
take precedence over) the parameters retrieved from the project | ||
configuration. | ||
is_lite: A flag to run Kedro-Viz in lite mode. | ||
Returns: | ||
A tuple containing the data catalog, pipeline dictionary, session store | ||
and dataset stats dictionary. | ||
""" | ||
if package_name: | ||
configure_project(package_name) | ||
else: | ||
# bootstrap project when viz is run in dev mode | ||
bootstrap_project(project_path) | ||
|
||
if is_lite: | ||
lite_parser = LiteParser(package_name) | ||
unresolved_imports = lite_parser.parse(project_path) | ||
sys_modules_patch = sys.modules.copy() | ||
|
||
if unresolved_imports and len(unresolved_imports) > 0: | ||
modules_to_mock: Set[str] = set() | ||
|
||
for unresolved_module_set in unresolved_imports.values(): | ||
modules_to_mock = modules_to_mock.union(unresolved_module_set) | ||
|
||
mocked_modules = lite_parser.create_mock_modules(modules_to_mock) | ||
sys_modules_patch.update(mocked_modules) | ||
|
||
logger.warning( | ||
"Kedro-Viz has mocked the following dependencies for lite-mode.\n" | ||
"%s \n" | ||
"In order to get a complete experience of Viz, " | ||
"please install the missing Kedro project dependencies\n", | ||
list(mocked_modules.keys()), | ||
) | ||
|
||
# Patch actual sys modules | ||
with patch.dict("sys.modules", sys_modules_patch): | ||
return _load_data_helper( | ||
project_path, env, include_hooks, extra_params, is_lite | ||
) | ||
else: | ||
return _load_data_helper( | ||
project_path, env, include_hooks, extra_params, is_lite | ||
) |
Oops, something went wrong.
@rashidakanchwala Not a blocker for release, but I don't know how I feel about having non-test code make use of
unittest.mock
. Stack Overflow, Reddit, and ChatGPT all agree that it's not a good practice... do you think we should open an issue to explore alternative approaches? cc @ravi-kumar-pilla