Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest): add custom StrEnum type #11270

Merged
merged 1 commit into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
from abc import abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Dict, List, Literal

from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec
from datahub.ingestion.api.report import Report
from datahub.utilities.lossy_collections import LossyDict, LossyList


class StrEnum(str, Enum):
pass
from datahub.utilities.str_enum import StrEnum


class CompileResultArtifactType(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Callable, Dict, Iterable, List, Optional, Union, cast

from datahub.api.entities.datajob import DataFlow, DataJob
Expand All @@ -21,6 +20,7 @@
DataProcessRunStatusClass,
DataProcessTypeClass,
)
from datahub.utilities.str_enum import StrEnum
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
from datahub.utilities.urns.data_job_urn import DataJobUrn
from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
Expand All @@ -33,7 +33,7 @@ class DataProcessInstanceKey(DatahubKey):
id: str


class InstanceRunResult(str, Enum):
class InstanceRunResult(StrEnum):
SUCCESS = RunResultType.SUCCESS
SKIPPED = RunResultType.SKIPPED
FAILURE = RunResultType.FAILURE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from datahub.configuration.common import ConfigModel
from datahub.configuration.datetimes import parse_absolute_time, parse_relative_timespan
from datahub.metadata.schema_classes import CalendarIntervalClass
from datahub.utilities.str_enum import StrEnum


@enum.unique
class BucketDuration(str, enum.Enum):
class BucketDuration(StrEnum):
DAY = CalendarIntervalClass.DAY
HOUR = CalendarIntervalClass.HOUR

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from enum import Enum
from typing import Any, Dict, List, Optional

from datahub_classify.helper_classes import ColumnInfo
Expand All @@ -10,6 +9,7 @@
from datahub.configuration.common import ConfigModel
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
from datahub.ingestion.glossary.classifier import Classifier
from datahub.utilities.str_enum import StrEnum


class NameFactorConfig(ConfigModel):
Expand All @@ -33,7 +33,7 @@ class DataTypeFactorConfig(ConfigModel):
)


class ValuePredictionType(str, Enum):
class ValuePredictionType(StrEnum):
REGEX = "regex"
LIBRARY = "library"

Expand Down
6 changes: 2 additions & 4 deletions metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import contextlib
import enum
import functools
import json
import logging
Expand Down Expand Up @@ -67,6 +66,7 @@
TelemetryClientIdClass,
)
from datahub.utilities.perf_timer import PerfTimer
from datahub.utilities.str_enum import StrEnum
from datahub.utilities.urns.urn import Urn, guess_entity_type

if TYPE_CHECKING:
Expand Down Expand Up @@ -1138,9 +1138,7 @@ def execute_graphql(

return result["data"]

class RelationshipDirection(str, enum.Enum):
# FIXME: Upgrade to enum.StrEnum when we drop support for Python 3.10

class RelationshipDirection(StrEnum):
INCOMING = "INCOMING"
OUTGOING = "OUTGOING"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import Enum
from datahub.utilities.str_enum import StrEnum


class DatasetSubTypes(str, Enum):
class DatasetSubTypes(StrEnum):
# Generic SubTypes
TABLE = "Table"
VIEW = "View"
Expand All @@ -26,7 +26,7 @@ class DatasetSubTypes(str, Enum):
NOTEBOOK = "Notebook"


class DatasetContainerSubTypes(str, Enum):
class DatasetContainerSubTypes(StrEnum):
# Generic SubTypes
DATABASE = "Database"
SCHEMA = "Schema"
Expand All @@ -41,7 +41,7 @@ class DatasetContainerSubTypes(str, Enum):
ABS_CONTAINER = "ABS container"


class BIContainerSubTypes(str, Enum):
class BIContainerSubTypes(StrEnum):
LOOKER_FOLDER = "Folder"
LOOKML_PROJECT = "LookML Project"
LOOKML_MODEL = "LookML Model"
Expand All @@ -55,11 +55,11 @@ class BIContainerSubTypes(str, Enum):
MODE_COLLECTION = "Collection"


class JobContainerSubTypes(str, Enum):
class JobContainerSubTypes(StrEnum):
NIFI_PROCESS_GROUP = "Process Group"


class BIAssetSubTypes(str, Enum):
class BIAssetSubTypes(StrEnum):
# Generic SubTypes
REPORT = "Report"

Expand Down
4 changes: 2 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Iterable, List, Optional, Type, cast

import avro.schema
Expand Down Expand Up @@ -73,11 +72,12 @@
)
from datahub.utilities.mapping import Constants, OperationProcessor
from datahub.utilities.registries.domain_registry import DomainRegistry
from datahub.utilities.str_enum import StrEnum

logger = logging.getLogger(__name__)


class KafkaTopicConfigKeys(str, Enum):
class KafkaTopicConfigKeys(StrEnum):
MIN_INSYNC_REPLICAS_CONFIG = "min.insync.replicas"
RETENTION_SIZE_CONFIG = "retention.bytes"
RETENTION_TIME_CONFIG = "retention.ms"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, cast

from looker_sdk.sdk.api40.models import WriteQuery


# Enum whose value is string and compatible with dictionary having string value as key
class StrEnum(str, Enum):
pass
from datahub.utilities.str_enum import StrEnum


class LookerModel(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import Enum
from datahub.utilities.str_enum import StrEnum


class SnowflakeCloudProvider(str, Enum):
class SnowflakeCloudProvider(StrEnum):
AWS = "aws"
GCP = "gcp"
AZURE = "azure"
Expand All @@ -10,7 +10,7 @@ class SnowflakeCloudProvider(str, Enum):
SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS


class SnowflakeEdition(str, Enum):
class SnowflakeEdition(StrEnum):
STANDARD = "Standard"

# We use this to represent Enterprise Edition or higher
Expand Down Expand Up @@ -44,7 +44,7 @@ class SnowflakeEdition(str, Enum):

# We will always compare with lowercase
# Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html
class SnowflakeObjectDomain(str, Enum):
class SnowflakeObjectDomain(StrEnum):
TABLE = "table"
EXTERNAL_TABLE = "external table"
VIEW = "view"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Set, cast

import pydantic
Expand Down Expand Up @@ -31,6 +30,7 @@
)
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
from datahub.utilities.global_warning_util import add_global_warning
from datahub.utilities.str_enum import StrEnum

logger = logging.Logger(__name__)

Expand All @@ -48,7 +48,7 @@
]


class TagOption(str, Enum):
class TagOption(StrEnum):
with_lineage = "with_lineage"
without_lineage = "without_lineage"
skip = "skip"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
import json
import logging
from collections import namedtuple
from enum import Enum
from itertools import groupby
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

from pydantic.dataclasses import dataclass
from pydantic.fields import Field

# This import verifies that the dependencies are available.
from pyhive import hive # noqa: F401
from sqlalchemy import create_engine, text
from sqlalchemy.engine.reflection import Inspector

Expand Down Expand Up @@ -61,13 +59,14 @@
ViewPropertiesClass,
)
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
from datahub.utilities.str_enum import StrEnum

logger: logging.Logger = logging.getLogger(__name__)

TableKey = namedtuple("TableKey", ["schema", "table"])


class HiveMetastoreConfigMode(str, Enum):
class HiveMetastoreConfigMode(StrEnum):
hive: str = "hive" # noqa: F811
presto: str = "presto"
presto_on_hive: str = "presto-on-hive"
Expand Down
33 changes: 33 additions & 0 deletions metadata-ingestion/src/datahub/testing/check_str_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pathlib
from typing import List


def ensure_no_enum_mixin(dirs: List[pathlib.Path]) -> None:
# See the docs on the StrEnum implementation for why this is necessary.

bad_lines = {
"(str, Enum)",
"(str, enum.Enum)",
# We don't have any int enums right now, but this will catch them if we add some.
"(int, Enum)",
"(int, enum.Enum)",
}

ignored_files = {
"datahub/utilities/str_enum.py",
"datahub/testing/check_str_enum.py",
}

for dir in dirs:
for file in dir.rglob("*.py"):
if any(str(file).endswith(ignored_file) for ignored_file in ignored_files):
continue

with file.open() as f:
for line in f:
if any(bad_line in line for bad_line in bad_lines):
raise ValueError(
f"Disallowed enum mixin found in {file}: `{line.rstrip()}`. "
"This enum mixin's behavior changed in Python 3.11, so it will work inconsistently across versions."
"Use datahub.utilities.str_enum.StrEnum instead."
)
14 changes: 14 additions & 0 deletions metadata-ingestion/src/datahub/utilities/str_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from enum import Enum


class StrEnum(str, Enum):
"""String Enum class."""

# This is required for compatibility with Python 3.11+, which changed the
# behavior of enums in format() and f-strings.
# Once we're using only Python 3.11+, we can replace this with enum.StrEnum.
# See https://blog.pecar.me/python-enum for more details.

def __str__(self) -> str:
"""Return the string representation of the enum."""
return str(self.value)
5 changes: 3 additions & 2 deletions metadata-ingestion/tests/performance/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from collections import OrderedDict
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Union

from typing_extensions import Literal

from datahub.utilities.str_enum import StrEnum

StatementType = Literal[ # SELECT + values from OperationTypeClass
"SELECT",
"INSERT",
Expand All @@ -26,7 +27,7 @@ class Container:
parent: Optional["Container"] = None


class ColumnType(str, Enum):
class ColumnType(StrEnum):
# Can add types that take parameters in the future

INTEGER = "INTEGER"
Expand Down
7 changes: 7 additions & 0 deletions metadata-ingestion/tests/unit/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import setuptools

from datahub.testing.check_imports import ensure_no_indirect_model_imports
from datahub.testing.check_str_enum import ensure_no_enum_mixin


def test_package_list_match_inits():
Expand All @@ -15,3 +16,9 @@ def test_check_import_paths(pytestconfig: pytest.Config) -> None:
root = pytestconfig.rootpath

ensure_no_indirect_model_imports([root / "src", root / "tests"])


def test_check_str_enum_usage(pytestconfig: pytest.Config) -> None:
root = pytestconfig.rootpath

ensure_no_enum_mixin([root / "src", root / "tests"])
Loading