From 160fd6bc66d4d780652dd77499abeb23e13f1c09 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 21 Jun 2023 10:00:06 -0400 Subject: [PATCH] Deprecate dataset errors with module `__getattr__` (#2673) * Replace and deprecate `DataSet` use in class names * Replace another format string with an f-string * Perform deprecations for cached, lambda, and partitioned datasets * Deprecated `MemoryDataSet` in favor of `MemoryDataset` Signed-off-by: Deepyaman Datta * Fix keyword argument to specify metaclass on `CachedDataSet` * Fix reference to `PartitionedDataset` * Keep `AbstractDataSet` subscriptable * Update __init__.py files, __all__ definitions, etc * Warn of impending Kedro 0.19 (not abstract future) * Update `VideoDataSet` to `VideoDataset` (and refs) * Add missing `kedro.utils.DeprecatedClassMeta` imps * Change deprecated references to `AbstractDataSet` Signed-off-by: Deepyaman Datta * Warn of impending Kedro 0.19 (not abstract future) * Rename `pandas.CSVDataSet` to `pandas.CSVDataset` * Fix some pylint errors and blacken code * Update `dask.ParquetDataSet` * Undo changes for `VideoDataSet`, inherit from new base * Undo changes to `APIDataSet`, inherit from new base * Fix some imports and missed references * Undo changes to `BioSequenceDataSet`, inherit from new base * Undo changes to Dask and Pandas datasets, inherit from new bases * Remove the `AbstractDataset` and `AbstractVersionedDataset` alias, update `kedro.io.core` * Undo changes in `kedro/extras/datasets` * Update branch * Change `DataSetError` to `DatasetError` * Remove deprecated aliases for Abstract*DataSet * Change `DataSetError` to `DatasetError` in tests/ * Change DataSet*Error to Dataset*Error in tests/ * Fix references to DataSet in a lot of tests * Change `CSVDataset` back to `CSBVDataSet` * Rename core datasets used across `tests` directory * Fix "Saving 'None' to a 'DataSet' is not allowed." messages * Fix `test_http_filesystem_no_versioning` everywhere * Fix removal of "data" * Deprecate `_SharedMemoryDataSet` in favor of `_SharedMemoryDataset` * Fix tests/pipeline/test_pipeline_from_missing.py * Fix list datasets test * Change patched IncrementalDataSet to IncrementalDataset * Fix default checkpoint dataset * Fix data catalog tests * Fix error message * Use `MemoryDataset`, not `MemoryDataSet`, by default * Use `MemoryDataset`, not `MemoryDataSet`, for missing datasets in data catalog * Rename DefaultDataSet key to DefaultDataset * Change `LambdaDataSet` to `LambdaDataset` in `test_node_run.py` * Update error message--but should I? * Update error message--but should I? * Update error message in kedro/io/core.py--but should I? * Update RELEASE.md * Fix remaining tests * Fix lint issues * Align capitalization * Add `DeprecatedClassMeta` tests from StackOverflow * Blacken kedro/utils.py * Ignore "No value for argument 'subclass' in unbound method call" * Rename 'foo' to 'value' to satisfy linter * Disable pylint messages on deprecated class definitions * Blacken kedro/utils.py * Wrap `kedro.io.core` to fix error deprecation * Simplify deprecation of error names to try to fix docs * Undo attempt to make docs pass Revert "Simplify deprecation of error names to try to fix docs" This reverts commit e9294bef0e42bbf9473024cb31444efd6b84025c. Revert "Wrap `kedro.io.core` to fix error deprecation" This reverts commit db9b7ac03c3f03c7a1d250fefc93f3d028c06494. * Wrap `kedro.io.core` to fix error deprecation * Leverage PEP 562 for less hacky error deprecations * Test old `DataSetError` in `kedro.extras.datasets` * Fix missing and unnecessary imports/other mistakes * Blacken kedro/io/core.py * Don't raise `DeprecationWarning` each time import from `kedro.io` * Replace `DataSetError` with `DatasetError` in test * Add missing "in" to a `DeprecationWarning` message * Add "Dataset" versions of errors to `kedro.io` doc * Add updated "Dataset" names to `kedro.io.rst` and sort the entries * Add `_SharedMemoryDataset` to type targets in conf * Revert all changes to `DatasetError` in `kedro/extras/datasets` The idea is so that we're not making changes not in Kedro-Datasets, and we can be confident that Kedro-Datasets won't break. * docs(datasets): fix a ref to `GeoJSONLocalDataset` * Consistently use `DatasetError` in dataset tests * Remove unused import * Remove "DataSet" imports from kedro/io/__init__.py * chore(sdk): use type hints to skirt linting errors * Implement workaround in `kedro/io/__init__.py` too * Import future annotation in `kedro/io/__init__.py` * test(datasets): cover raising a DeprecationWarning * Ignore pylint warnings (working as intended) * Replace `globals()` in `kedro.io.core.__getattr__` * Remove use of `exec` (use `import_module` instead) * Rename the parameters passed to `test_deprecation` --------- Signed-off-by: Deepyaman Datta --- kedro/extras/datasets/geopandas/__init__.py | 2 +- kedro/io/__init__.py | 20 +++++++++-- kedro/io/core.py | 39 ++++++++++++--------- tests/io/test_core.py | 15 +++++++- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/kedro/extras/datasets/geopandas/__init__.py b/kedro/extras/datasets/geopandas/__init__.py index 8a8f9e8101..966577fc37 100644 --- a/kedro/extras/datasets/geopandas/__init__.py +++ b/kedro/extras/datasets/geopandas/__init__.py @@ -1,4 +1,4 @@ -"""``GeoJSONLocalDataset`` is an ``AbstractVersionedDataSet`` to save and load GeoJSON files. +"""``GeoJSONDataSet`` is an ``AbstractVersionedDataSet`` to save and load GeoJSON files. """ __all__ = ["GeoJSONDataSet"] diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py index 112d90d809..5899f904c6 100644 --- a/kedro/io/__init__.py +++ b/kedro/io/__init__.py @@ -1,16 +1,14 @@ """``kedro.io`` provides functionality to read and write to a number of data sets. At the core of the library is the ``AbstractDataSet`` class. """ +from __future__ import annotations from .cached_dataset import CachedDataSet, CachedDataset from .core import ( AbstractDataSet, AbstractVersionedDataSet, - DataSetAlreadyExistsError, DatasetAlreadyExistsError, - DataSetError, DatasetError, - DataSetNotFoundError, DatasetNotFoundError, Version, ) @@ -24,6 +22,22 @@ PartitionedDataset, ) +# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901 +DataSetError: type[Exception] +DataSetNotFoundError: type[DatasetError] +DataSetAlreadyExistsError: type[DatasetError] + + +def __getattr__(name): + import kedro.io.core # pylint: disable=import-outside-toplevel + + if name in ( + kedro.io.core._DEPRECATED_ERROR_CLASSES # pylint: disable=protected-access + ): + return getattr(kedro.io.core, name) + raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}") + + __all__ = [ "AbstractDataSet", "AbstractVersionedDataSet", diff --git a/kedro/io/core.py b/kedro/io/core.py index 1387812eef..fd90d25b8f 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -20,7 +20,7 @@ from cachetools import Cache, cachedmethod from cachetools.keys import hashkey -from kedro.utils import DeprecatedClassMeta, load_obj +from kedro.utils import load_obj warnings.simplefilter("default", DeprecationWarning) @@ -31,6 +31,11 @@ PROTOCOL_DELIMITER = "://" CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive") +# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901 +DataSetError: type[Exception] +DataSetNotFoundError: type[DatasetError] +DataSetAlreadyExistsError: type[DatasetError] + class DatasetError(Exception): """``DatasetError`` raised by ``AbstractDataSet`` implementations @@ -43,12 +48,6 @@ class DatasetError(Exception): pass -class DataSetError(metaclass=DeprecatedClassMeta): - # pylint: disable=missing-class-docstring, too-few-public-methods - - _DeprecatedClassMeta__alias = DatasetError - - class DatasetNotFoundError(DatasetError): """``DatasetNotFoundError`` raised by ``DataCatalog`` class in case of trying to use a non-existing data set. @@ -57,12 +56,6 @@ class DatasetNotFoundError(DatasetError): pass -class DataSetNotFoundError(metaclass=DeprecatedClassMeta): - # pylint: disable=missing-class-docstring, too-few-public-methods - - _DeprecatedClassMeta__alias = DatasetNotFoundError - - class DatasetAlreadyExistsError(DatasetError): """``DatasetAlreadyExistsError`` raised by ``DataCatalog`` class in case of trying to add a data set which already exists in the ``DataCatalog``. @@ -71,10 +64,24 @@ class DatasetAlreadyExistsError(DatasetError): pass -class DataSetAlreadyExistsError(metaclass=DeprecatedClassMeta): - # pylint: disable=missing-class-docstring, too-few-public-methods +_DEPRECATED_ERROR_CLASSES = { + "DataSetError": DatasetError, + "DataSetNotFoundError": DatasetNotFoundError, + "DataSetAlreadyExistsError": DatasetAlreadyExistsError, +} - _DeprecatedClassMeta__alias = DatasetAlreadyExistsError + +def __getattr__(name): + if name in _DEPRECATED_ERROR_CLASSES: + alias = _DEPRECATED_ERROR_CLASSES[name] + warnings.warn( + f"{repr(name)} has been renamed to {repr(alias.__name__)}, " + f"and the alias will be removed in Kedro 0.19.0", + DeprecationWarning, + stacklevel=2, + ) + return alias + raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}") class VersionNotFoundError(DatasetError): diff --git a/tests/io/test_core.py b/tests/io/test_core.py index 64e0f3d6dd..05a3204639 100644 --- a/tests/io/test_core.py +++ b/tests/io/test_core.py @@ -1,5 +1,6 @@ from __future__ import annotations +import importlib from decimal import Decimal from fractions import Fraction from pathlib import PurePosixPath @@ -7,7 +8,12 @@ import pytest -from kedro.io.core import AbstractDataSet, _parse_filepath, get_filepath_str +from kedro.io.core import ( + _DEPRECATED_ERROR_CLASSES, + AbstractDataSet, + _parse_filepath, + get_filepath_str, +) # List sourced from https://docs.python.org/3/library/stdtypes.html#truth-value-testing. # Excludes None, as None values are not shown in the str representation. @@ -27,6 +33,13 @@ ] +@pytest.mark.parametrize("module_name", ["kedro.io", "kedro.io.core"]) +@pytest.mark.parametrize("class_name", _DEPRECATED_ERROR_CLASSES) +def test_deprecation(module_name, class_name): + with pytest.warns(DeprecationWarning, match=f"{repr(class_name)} has been renamed"): + getattr(importlib.import_module(module_name), class_name) + + class MyDataSet(AbstractDataSet): def __init__(self, var=None): self.var = var