Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(ingest): start using explicit exports #11899

Merged
merged 9 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions metadata-ingestion/scripts/avro_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def generate(
import importlib
from typing import TYPE_CHECKING

from datahub._codegen.aspect import _Aspect
from datahub._codegen.aspect import _Aspect as _Aspect
from datahub.utilities.docs_build import IS_SPHINX_BUILD
from datahub.utilities._custom_package_loader import get_custom_models_package

Expand Down Expand Up @@ -802,7 +802,7 @@ def generate(

from datahub.utilities.docs_build import IS_SPHINX_BUILD
from datahub.utilities._custom_package_loader import get_custom_urns_package
from datahub.utilities.urns._urn_base import Urn # noqa: F401
from datahub.utilities.urns._urn_base import Urn as Urn # noqa: F401

_custom_package_path = get_custom_urns_package()

Expand Down
10 changes: 9 additions & 1 deletion metadata-ingestion/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ exclude =
__pycache__
per-file-ignores =
# imported but unused
__init__.py: F401
__init__.py: F401, I250
ban-relative-imports = true

[mypy]
Expand All @@ -53,6 +53,14 @@ disallow_untyped_defs = no
# try to be a bit more strict in certain areas of the codebase
[mypy-datahub.*]
ignore_missing_imports = no
implicit_reexport = no
[mypy-datahub.metadata.*]
# TODO: Remove this once all the code has been updated.
implicit_reexport = yes
[mypy-datahub.ingestion.*]
# TODO: Remove this once all the code has been updated.
implicit_reexport = yes

[mypy-datahub_provider.*]
ignore_missing_imports = no
[mypy-tests.*]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,10 @@
)

requests_logger.setLevel(logging.WARNING)

__all__ = [
"AssertionCircuitBreaker",
"AssertionCircuitBreakerConfig",
"OperationCircuitBreaker",
"OperationCircuitBreakerConfig",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from gql.transport.requests import RequestsHTTPTransport
from pydantic import Field

from datahub.configuration import ConfigModel
from datahub.configuration.common import ConfigModel

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from datahub.api.entities.datajob.dataflow import DataFlow
from datahub.api.entities.datajob.datajob import DataJob

# TODO: Remove this and start importing directly from the inner files.
__all__ = ["DataFlow", "DataJob"]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Callable, Dict, Iterable, List, Optional, Set, cast

import datahub.emitter.mce_builder as builder
from datahub.configuration.source_common import ALL_ENV_TYPES
from datahub.emitter.generic_emitter import Emitter
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import (
Expand Down Expand Up @@ -114,7 +113,7 @@ def generate_tags_aspect(self) -> List[GlobalTagsClass]:

def _get_env(self) -> Optional[str]:
env: Optional[str] = None
if self.env and self.env.upper() in ALL_ENV_TYPES:
if self.env and self.env.upper() in builder.ALL_ENV_TYPES:
env = self.env.upper()
else:
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Callable, Dict, Iterable, List, Optional, Set

import datahub.emitter.mce_builder as builder
from datahub.configuration.source_common import ALL_ENV_TYPES
from datahub.emitter.generic_emitter import Emitter
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import (
Expand Down Expand Up @@ -109,7 +108,7 @@ def generate_mcp(
self, materialize_iolets: bool = True
) -> Iterable[MetadataChangeProposalWrapper]:
env: Optional[str] = None
if self.flow_urn.cluster.upper() in ALL_ENV_TYPES:
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
env = self.flow_urn.cluster.upper()
else:
logger.debug(
Expand Down
2 changes: 2 additions & 0 deletions metadata-ingestion/src/datahub/api/graphql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from datahub.api.graphql.assertion import Assertion
from datahub.api.graphql.operation import Operation

__all__ = ["Assertion", "Operation"]
3 changes: 2 additions & 1 deletion metadata-ingestion/src/datahub/cli/put_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

from datahub.cli.cli_utils import post_entity
from datahub.configuration.config_loader import load_config_file
from datahub.emitter.mcp import MetadataChangeProposalWrapper, SystemMetadataClass
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import get_default_graph
from datahub.metadata.schema_classes import (
DataPlatformInfoClass as DataPlatformInfo,
PlatformTypeClass,
SystemMetadataClass,
)
from datahub.telemetry import telemetry
from datahub.upgrade import upgrade
Expand Down
5 changes: 2 additions & 3 deletions metadata-ingestion/src/datahub/configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datahub.configuration.common import (
ConfigModel,
ConfigurationMechanism,
DynamicTypedConfig,
ConfigModel as ConfigModel,
DynamicTypedConfig as DynamicTypedConfig,
)
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/configuration/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pydantic.fields import Field
from typing_extensions import Protocol

from datahub.configuration._config_enum import ConfigEnum
from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
from datahub.utilities.dedup_list import deduplicate_list

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from typing import IO

from datahub.configuration import ConfigurationMechanism
from datahub.configuration.common import ConfigurationMechanism


class JsonConfigurationMechanism(ConfigurationMechanism):
Expand Down
8 changes: 2 additions & 6 deletions metadata-ingestion/src/datahub/configuration/source_common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
from typing import Dict, Optional, Set
from typing import Dict, Optional

from pydantic import validator
from pydantic.fields import Field

from datahub.configuration.common import ConfigModel
from datahub.emitter.enum_helpers import get_enum_options
from datahub.metadata.schema_classes import FabricTypeClass

DEFAULT_ENV = FabricTypeClass.PROD
ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass))
from datahub.emitter.mce_builder import ALL_ENV_TYPES, DEFAULT_ENV


class PlatformInstanceConfigMixin(ConfigModel):
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/configuration/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import yaml

from datahub.configuration import ConfigurationMechanism
from datahub.configuration.common import ConfigurationMechanism


class YamlConfigurationMechanism(ConfigurationMechanism):
Expand Down
6 changes: 5 additions & 1 deletion metadata-ingestion/src/datahub/emitter/mce_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Any,
List,
Optional,
Set,
Tuple,
Type,
TypeVar,
Expand All @@ -24,7 +25,6 @@
import typing_inspect
from avrogen.dict_wrapper import DictWrapper

from datahub.configuration.source_common import DEFAULT_ENV
from datahub.emitter.enum_helpers import get_enum_options
from datahub.metadata.schema_classes import (
AssertionKeyClass,
Expand All @@ -35,6 +35,7 @@
DatasetKeyClass,
DatasetLineageTypeClass,
DatasetSnapshotClass,
FabricTypeClass,
GlobalTagsClass,
GlossaryTermAssociationClass,
GlossaryTermsClass as GlossaryTerms,
Expand All @@ -56,6 +57,9 @@
logger = logging.getLogger(__name__)
Aspect = TypeVar("Aspect", bound=AspectAbstract)

DEFAULT_ENV = FabricTypeClass.PROD
ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass))

DEFAULT_FLOW_CLUSTER = "prod"
UNKNOWN_USER = "urn:li:corpuser:unknown"
DATASET_URN_TO_LOWER: bool = (
Expand Down
7 changes: 2 additions & 5 deletions metadata-ingestion/src/datahub/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,10 @@
generate_access_token,
make_shim_command,
)
from datahub.cli.config_utils import (
DATAHUB_CONFIG_PATH,
get_boolean_env_variable,
write_gms_config,
)
from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
from datahub.cli.delete_cli import delete
from datahub.cli.docker_cli import docker
from datahub.cli.env_utils import get_boolean_env_variable
from datahub.cli.exists_cli import exists
from datahub.cli.get_cli import get
from datahub.cli.ingest_cli import ingest
Expand Down
5 changes: 4 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/api/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from typing import Callable, Dict, Optional, Type

from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceCapability
from datahub.ingestion.api.source import ( # noqa: I250
Source,
SourceCapability as SourceCapability,
)


def config_class(config_cls: Type) -> Callable[[Type], Type]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
RecordTypeClass,
SchemaFieldClass as SchemaField,
SchemaFieldDataTypeClass,
SchemaMetadataClass as SchemaMetadata,
SchemaMetadataClass,
StringTypeClass,
UnionTypeClass,
)
Expand Down Expand Up @@ -665,13 +665,13 @@ def get_schema_metadata(
name: str,
json_schema: Dict[Any, Any],
raw_schema_string: Optional[str] = None,
) -> SchemaMetadata:
) -> SchemaMetadataClass:
json_schema_as_string = raw_schema_string or json.dumps(json_schema)
md5_hash: str = md5(json_schema_as_string.encode()).hexdigest()

schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(json_schema))

schema_metadata = SchemaMetadata(
schema_metadata = SchemaMetadataClass(
schemaName=name,
platform=f"urn:li:dataPlatform:{platform}",
version=0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
OneofDescriptor,
)

from datahub.metadata.com.linkedin.pegasus2avro.schema import (
from datahub.metadata.schema_classes import (
ArrayTypeClass,
BooleanTypeClass,
BytesTypeClass,
Expand All @@ -41,8 +41,8 @@
MapTypeClass,
NumberTypeClass,
RecordTypeClass,
SchemaField,
SchemaFieldDataType,
SchemaFieldClass as SchemaField,
SchemaFieldDataTypeClass as SchemaFieldDataType,
StringTypeClass,
UnionTypeClass,
)
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.emitter.serialization_helper import post_json_transform
from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.ingestion.graph.config import ( # noqa: I250; TODO: Remove this alias
DatahubClientConfig as DatahubClientConfig,
)
from datahub.ingestion.graph.connections import (
connections_gql,
get_id_from_connection_urn,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
redact_raw_config,
)
from datahub.emitter.aspect import JSON_CONTENT_TYPE
from datahub.emitter.mce_builder import datahub_guid
from datahub.emitter.mce_builder import datahub_guid, make_data_platform_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import make_data_platform_urn
from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
from datahub.ingestion.api.pipeline_run_listener import PipelineRunListener
from datahub.ingestion.api.sink import NoopWriteCallback, Sink
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,8 @@
ViewPropertiesClass,
)
from datahub.metadata.urns import DatasetUrn
from datahub.sql_parsing.schema_resolver import SchemaResolver
from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
from datahub.sql_parsing.sqlglot_lineage import (
SchemaInfo,
SqlParsingDebugInfo,
SqlParsingResult,
infer_output_schema,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from pydantic import Field
from typing_extensions import Literal

from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.source_common import (
ConfigModel,
EnvConfigMixin,
PlatformInstanceConfigMixin,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,22 @@
from datahub.ingestion.source.state.stateful_ingestion_base import (
StatefulIngestionSourceBase,
)
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
from datahub.metadata.schema_classes import (
ArrayTypeClass,
BooleanTypeClass,
BytesTypeClass,
DataPlatformInstanceClass,
DatasetPropertiesClass,
NullTypeClass,
NumberTypeClass,
RecordTypeClass,
SchemaField,
SchemaFieldDataType,
SchemaFieldClass as SchemaField,
SchemaFieldDataTypeClass as SchemaFieldDataType,
SchemalessClass,
SchemaMetadata,
SchemaMetadataClass,
StringTypeClass,
UnionTypeClass,
)
from datahub.metadata.schema_classes import (
DataPlatformInstanceClass,
DatasetPropertiesClass,
)
from datahub.utilities.registries.domain_registry import DomainRegistry

MAX_ITEMS_TO_RETRIEVE = 100
Expand Down Expand Up @@ -448,7 +446,7 @@ def construct_schema_metadata(
dataset_properties: DatasetPropertiesClass,
schema: Dict[Tuple[str, ...], SchemaDescription],
primary_key_dict: Dict[str, str],
) -> SchemaMetadata:
) -> SchemaMetadataClass:
""" "
To construct the schema metadata, it will first sort the schema by the occurrence of attribute names
in descending order and truncate the schema by MAX_SCHEMA_SIZE, and then start to construct the
Expand Down Expand Up @@ -502,7 +500,7 @@ def construct_schema_metadata(
canonical_schema.append(field)

# create schema metadata object for table
schema_metadata = SchemaMetadata(
schema_metadata = SchemaMetadataClass(
schemaName=table_name,
platform=f"urn:li:dataPlatform:{self.platform}",
version=0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
ConfigModel,
ConfigurationWarning,
)
from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.emitter.mce_builder import DEFAULT_ENV
from datahub.ingestion.api.report import Report
from datahub.ingestion.source.bigquery_v2.bigquery_config import (
BigQueryConnectionConfig,
Expand Down
Loading
Loading