diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py b/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py index 27b43a58530b1..09a2371329c72 100644 --- a/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py +++ b/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py @@ -1,16 +1,12 @@ from abc import abstractmethod from dataclasses import dataclass, field -from enum import Enum from pathlib import Path from typing import Dict, List, Literal from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec from datahub.ingestion.api.report import Report from datahub.utilities.lossy_collections import LossyDict, LossyList - - -class StrEnum(str, Enum): - pass +from datahub.utilities.str_enum import StrEnum class CompileResultArtifactType(StrEnum): diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py index 771efd1f2aa51..bf521ded5dbf3 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py +++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py @@ -1,6 +1,5 @@ import time from dataclasses import dataclass, field -from enum import Enum from typing import Callable, Dict, Iterable, List, Optional, Union, cast from datahub.api.entities.datajob import DataFlow, DataJob @@ -21,6 +20,7 @@ DataProcessRunStatusClass, DataProcessTypeClass, ) +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn @@ -33,7 +33,7 @@ class DataProcessInstanceKey(DatahubKey): id: str -class InstanceRunResult(str, Enum): +class InstanceRunResult(StrEnum): SUCCESS = RunResultType.SUCCESS SKIPPED = RunResultType.SKIPPED FAILURE = RunResultType.FAILURE diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py index f20ab85be0585..b3cc031609117 100644 --- a/metadata-ingestion/src/datahub/configuration/time_window_config.py +++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py @@ -9,10 +9,11 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.datetimes import parse_absolute_time, parse_relative_timespan from datahub.metadata.schema_classes import CalendarIntervalClass +from datahub.utilities.str_enum import StrEnum @enum.unique -class BucketDuration(str, enum.Enum): +class BucketDuration(StrEnum): DAY = CalendarIntervalClass.DAY HOUR = CalendarIntervalClass.HOUR diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py index 94a65d887efbc..50268768d0ce9 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py @@ -1,4 +1,3 @@ -from enum import Enum from typing import Any, Dict, List, Optional from datahub_classify.helper_classes import ColumnInfo @@ -10,6 +9,7 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 from datahub.ingestion.glossary.classifier import Classifier +from datahub.utilities.str_enum import StrEnum class NameFactorConfig(ConfigModel): @@ -33,7 +33,7 @@ class DataTypeFactorConfig(ConfigModel): ) -class ValuePredictionType(str, Enum): +class ValuePredictionType(StrEnum): REGEX = "regex" LIBRARY = "library" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 55bd0b3cf0afc..234d7e5e255d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1,5 +1,4 @@ import contextlib -import enum import functools import json import logging @@ -67,6 +66,7 @@ TelemetryClientIdClass, ) from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.urn import Urn, guess_entity_type if TYPE_CHECKING: @@ -1138,9 +1138,7 @@ def execute_graphql( return result["data"] - class RelationshipDirection(str, enum.Enum): - # FIXME: Upgrade to enum.StrEnum when we drop support for Python 3.10 - + class RelationshipDirection(StrEnum): INCOMING = "INCOMING" OUTGOING = "OUTGOING" diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 4d335779fe49b..fb22f0b6edde2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class DatasetSubTypes(str, Enum): +class DatasetSubTypes(StrEnum): # Generic SubTypes TABLE = "Table" VIEW = "View" @@ -26,7 +26,7 @@ class DatasetSubTypes(str, Enum): NOTEBOOK = "Notebook" -class DatasetContainerSubTypes(str, Enum): +class DatasetContainerSubTypes(StrEnum): # Generic SubTypes DATABASE = "Database" SCHEMA = "Schema" @@ -41,7 +41,7 @@ class DatasetContainerSubTypes(str, Enum): ABS_CONTAINER = "ABS container" -class BIContainerSubTypes(str, Enum): +class BIContainerSubTypes(StrEnum): LOOKER_FOLDER = "Folder" LOOKML_PROJECT = "LookML Project" LOOKML_MODEL = "LookML Model" @@ -55,11 +55,11 @@ class BIContainerSubTypes(str, Enum): MODE_COLLECTION = "Collection" -class JobContainerSubTypes(str, Enum): +class JobContainerSubTypes(StrEnum): NIFI_PROCESS_GROUP = "Process Group" -class BIAssetSubTypes(str, Enum): +class BIAssetSubTypes(StrEnum): # Generic SubTypes REPORT = "Report" diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 0d718e509d5c5..a757250a0d6c8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -2,7 +2,6 @@ import json import logging from dataclasses import dataclass, field -from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Type, cast import avro.schema @@ -73,11 +72,12 @@ ) from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.registries.domain_registry import DomainRegistry +from datahub.utilities.str_enum import StrEnum logger = logging.getLogger(__name__) -class KafkaTopicConfigKeys(str, Enum): +class KafkaTopicConfigKeys(StrEnum): MIN_INSYNC_REPLICAS_CONFIG = "min.insync.replicas" RETENTION_SIZE_CONFIG = "retention.bytes" RETENTION_TIME_CONFIG = "retention.ms" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py index b3002828ceeff..7ed46c8f7084c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py @@ -1,13 +1,9 @@ from dataclasses import dataclass, field -from enum import Enum from typing import Dict, List, cast from looker_sdk.sdk.api40.models import WriteQuery - -# Enum whose value is string and compatible with dictionary having string value as key -class StrEnum(str, Enum): - pass +from datahub.utilities.str_enum import StrEnum class LookerModel(StrEnum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py index 3ce684b29cf39..fc464cc5ea9bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class SnowflakeCloudProvider(str, Enum): +class SnowflakeCloudProvider(StrEnum): AWS = "aws" GCP = "gcp" AZURE = "azure" @@ -10,7 +10,7 @@ class SnowflakeCloudProvider(str, Enum): SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS -class SnowflakeEdition(str, Enum): +class SnowflakeEdition(StrEnum): STANDARD = "Standard" # We use this to represent Enterprise Edition or higher @@ -44,7 +44,7 @@ class SnowflakeEdition(str, Enum): # We will always compare with lowercase # Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html -class SnowflakeObjectDomain(str, Enum): +class SnowflakeObjectDomain(StrEnum): TABLE = "table" EXTERNAL_TABLE = "external table" VIEW = "view" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 9e74fb8b496aa..229c0e292fbaf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -1,7 +1,6 @@ import logging from collections import defaultdict from dataclasses import dataclass -from enum import Enum from typing import Dict, List, Optional, Set, cast import pydantic @@ -31,6 +30,7 @@ ) from datahub.ingestion.source.usage.usage_common import BaseUsageConfig from datahub.utilities.global_warning_util import add_global_warning +from datahub.utilities.str_enum import StrEnum logger = logging.Logger(__name__) @@ -48,7 +48,7 @@ ] -class TagOption(str, Enum): +class TagOption(StrEnum): with_lineage = "with_lineage" without_lineage = "without_lineage" skip = "skip" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index 9da6c29488124..adb171d4ad54b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -2,7 +2,6 @@ import json import logging from collections import namedtuple -from enum import Enum from itertools import groupby from typing import Any, Dict, Iterable, List, Optional, Tuple, Union @@ -10,7 +9,6 @@ from pydantic.fields import Field # This import verifies that the dependencies are available. -from pyhive import hive # noqa: F401 from sqlalchemy import create_engine, text from sqlalchemy.engine.reflection import Inspector @@ -61,13 +59,14 @@ ViewPropertiesClass, ) from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column +from datahub.utilities.str_enum import StrEnum logger: logging.Logger = logging.getLogger(__name__) TableKey = namedtuple("TableKey", ["schema", "table"]) -class HiveMetastoreConfigMode(str, Enum): +class HiveMetastoreConfigMode(StrEnum): hive: str = "hive" # noqa: F811 presto: str = "presto" presto_on_hive: str = "presto-on-hive" diff --git a/metadata-ingestion/src/datahub/testing/check_str_enum.py b/metadata-ingestion/src/datahub/testing/check_str_enum.py new file mode 100644 index 0000000000000..2d1a84aa5f738 --- /dev/null +++ b/metadata-ingestion/src/datahub/testing/check_str_enum.py @@ -0,0 +1,33 @@ +import pathlib +from typing import List + + +def ensure_no_enum_mixin(dirs: List[pathlib.Path]) -> None: + # See the docs on the StrEnum implementation for why this is necessary. + + bad_lines = { + "(str, Enum)", + "(str, enum.Enum)", + # We don't have any int enums right now, but this will catch them if we add some. + "(int, Enum)", + "(int, enum.Enum)", + } + + ignored_files = { + "datahub/utilities/str_enum.py", + "datahub/testing/check_str_enum.py", + } + + for dir in dirs: + for file in dir.rglob("*.py"): + if any(str(file).endswith(ignored_file) for ignored_file in ignored_files): + continue + + with file.open() as f: + for line in f: + if any(bad_line in line for bad_line in bad_lines): + raise ValueError( + f"Disallowed enum mixin found in {file}: `{line.rstrip()}`. " + "This enum mixin's behavior changed in Python 3.11, so it will work inconsistently across versions." + "Use datahub.utilities.str_enum.StrEnum instead." + ) diff --git a/metadata-ingestion/src/datahub/utilities/str_enum.py b/metadata-ingestion/src/datahub/utilities/str_enum.py new file mode 100644 index 0000000000000..b8392f40770e4 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/str_enum.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class StrEnum(str, Enum): + """String Enum class.""" + + # This is required for compatibility with Python 3.11+, which changed the + # behavior of enums in format() and f-strings. + # Once we're using only Python 3.11+, we can replace this with enum.StrEnum. + # See https://blog.pecar.me/python-enum for more details. + + def __str__(self) -> str: + """Return the string representation of the enum.""" + return str(self.value) diff --git a/metadata-ingestion/tests/performance/data_model.py b/metadata-ingestion/tests/performance/data_model.py index 728bb6ddde215..99b8820b45fba 100644 --- a/metadata-ingestion/tests/performance/data_model.py +++ b/metadata-ingestion/tests/performance/data_model.py @@ -2,11 +2,12 @@ from collections import OrderedDict from dataclasses import dataclass, field from datetime import datetime -from enum import Enum from typing import Dict, List, Optional, Union from typing_extensions import Literal +from datahub.utilities.str_enum import StrEnum + StatementType = Literal[ # SELECT + values from OperationTypeClass "SELECT", "INSERT", @@ -26,7 +27,7 @@ class Container: parent: Optional["Container"] = None -class ColumnType(str, Enum): +class ColumnType(StrEnum): # Can add types that take parameters in the future INTEGER = "INTEGER" diff --git a/metadata-ingestion/tests/unit/test_packages.py b/metadata-ingestion/tests/unit/test_packages.py index 69fe42e9f1e8a..f4045bac6e6ef 100644 --- a/metadata-ingestion/tests/unit/test_packages.py +++ b/metadata-ingestion/tests/unit/test_packages.py @@ -2,6 +2,7 @@ import setuptools from datahub.testing.check_imports import ensure_no_indirect_model_imports +from datahub.testing.check_str_enum import ensure_no_enum_mixin def test_package_list_match_inits(): @@ -15,3 +16,9 @@ def test_check_import_paths(pytestconfig: pytest.Config) -> None: root = pytestconfig.rootpath ensure_no_indirect_model_imports([root / "src", root / "tests"]) + + +def test_check_str_enum_usage(pytestconfig: pytest.Config) -> None: + root = pytestconfig.rootpath + + ensure_no_enum_mixin([root / "src", root / "tests"])