From c42f77985947418de108e7e5b515aafdbf638ed3 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Fri, 29 Nov 2024 21:28:31 +0530 Subject: [PATCH] fix: Add option for disabling ownership extraction (#11970) Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .../app/ingest/source/builder/sources.json | 2 +- .../docs/sources/dremio/dremio_recipe.yml | 2 ++ .../ingestion/source/dremio/dremio_aspects.py | 34 +++++++++++-------- .../ingestion/source/dremio/dremio_config.py | 5 +++ .../ingestion/source/dremio/dremio_source.py | 2 ++ 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index 70d9baabdb4bc6..44b8a37f14655d 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -309,7 +309,7 @@ "displayName": "Dremio", "description": "Import Spaces, Sources, Tables and statistics from Dremio.", "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/", - "recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: \n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: \n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n ingest_owner: true\n\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:cassandra", diff --git a/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml b/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml index 9dcd4f8b337d16..d18d19da2de84b 100644 --- a/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml +++ b/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml @@ -20,6 +20,8 @@ source: include_query_lineage: True + ingest_owner: true + #Optional source_mappings: - platform: s3 diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py index b29fc91a25e74c..d9d85edbf4f7a0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py @@ -142,6 +142,7 @@ def __init__( platform: str, ui_url: str, env: str, + ingest_owner: bool, domain: Optional[str] = None, platform_instance: Optional[str] = None, ): @@ -150,6 +151,7 @@ def __init__( self.env = env self.domain = domain self.ui_url = ui_url + self.ingest_owner = ingest_owner def get_container_key( self, name: Optional[str], path: Optional[List[str]] @@ -426,21 +428,23 @@ def _create_external_url(self, dataset: DremioDataset) -> str: return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"' def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]: - if not dataset.owner: - return None - owner = ( - make_user_urn(dataset.owner) - if dataset.owner_type == "USER" - else make_group_urn(dataset.owner) - ) - return OwnershipClass( - owners=[ - OwnerClass( - owner=owner, - type=OwnershipTypeClass.TECHNICAL_OWNER, - ) - ] - ) + if self.ingest_owner and dataset.owner: + owner_urn = ( + make_user_urn(dataset.owner) + if dataset.owner_type == "USER" + else make_group_urn(dataset.owner) + ) + ownership: OwnershipClass = OwnershipClass( + owners=[ + OwnerClass( + owner=owner_urn, + type=OwnershipTypeClass.TECHNICAL_OWNER, + ) + ] + ) + return ownership + + return None def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass: return GlossaryTermsClass( diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py index d966d575c03320..b3f2107a1dfaa7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py @@ -174,3 +174,8 @@ def is_profiling_enabled(self) -> bool: default=False, description="Whether to include query-based lineage information.", ) + + ingest_owner: bool = Field( + default=True, + description="Ingest Owner from source. This will override Owner info entered from UI", + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py index 5b96845ec04961..5535a406177016 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py @@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase): - Ownership and Glossary Terms: - Metadata related to ownership of datasets, extracted from Dremio’s ownership model. - Glossary terms and business metadata associated with datasets, providing additional context to the data. + - Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition. - Optional SQL Profiling (if enabled): - Table, row, and column statistics can be profiled and ingested via optional SQL queries. @@ -123,6 +124,7 @@ def __init__(self, config: DremioSourceConfig, ctx: PipelineContext): self.dremio_aspects = DremioAspects( platform=self.get_platform(), domain=self.config.domain, + ingest_owner=self.config.ingest_owner, platform_instance=self.config.platform_instance, env=self.config.env, ui_url=dremio_api.ui_url,