diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index bb5d0636f67123..99790de529ac3a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -129,7 +129,9 @@ def tables_for_database(db_name: Optional[str]) -> str: row_count AS "ROW_COUNT", bytes AS "BYTES", clustering_key AS "CLUSTERING_KEY", - auto_clustering_on AS "AUTO_CLUSTERING_ON" + auto_clustering_on AS "AUTO_CLUSTERING_ON", + is_dynamic AS "IS_DYNAMIC", + is_iceberg AS "IS_ICEBERG" FROM {db_clause}information_schema.tables t WHERE table_schema != 'INFORMATION_SCHEMA' and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE') @@ -149,7 +151,9 @@ def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str: row_count AS "ROW_COUNT", bytes AS "BYTES", clustering_key AS "CLUSTERING_KEY", - auto_clustering_on AS "AUTO_CLUSTERING_ON" + auto_clustering_on AS "AUTO_CLUSTERING_ON", + is_dynamic AS "IS_DYNAMIC", + is_iceberg AS "IS_ICEBERG" FROM {db_clause}information_schema.tables t where table_schema='{schema_name}' and table_type in ('BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE') diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index b5f56f99431f91..030b2d43be81f9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -113,6 +113,7 @@ class SnowflakeV2Report( external_lineage_queries_secs: float = -1 num_tables_with_known_upstreams: int = 0 num_upstream_lineage_edge_parsing_failed: int = 0 + num_secure_views_missing_definition: int = 0 data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 600292c2c99429..5a69b4bb779d72 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -90,6 +90,12 @@ class SnowflakeTable(BaseTable): foreign_keys: List[SnowflakeFK] = field(default_factory=list) tags: Optional[List[SnowflakeTag]] = None column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict) + is_dynamic: bool = False + is_iceberg: bool = False + + @property + def is_hybrid(self) -> bool: + return self.type is not None and self.type == "HYBRID TABLE" @dataclass @@ -98,6 +104,7 @@ class SnowflakeView(BaseView): columns: List[SnowflakeColumn] = field(default_factory=list) tags: Optional[List[SnowflakeTag]] = None column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict) + is_secure: bool = False @dataclass @@ -289,6 +296,8 @@ def get_tables_for_database( rows_count=table["ROW_COUNT"], comment=table["COMMENT"], clustering_key=table["CLUSTERING_KEY"], + is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES", + is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES", ) ) return tables @@ -313,6 +322,8 @@ def get_tables_for_schema( rows_count=table["ROW_COUNT"], comment=table["COMMENT"], clustering_key=table["CLUSTERING_KEY"], + is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES", + is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES", ) ) return tables @@ -356,6 +367,7 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]] materialized=( view.get("is_materialized", "false").lower() == "true" ), + is_secure=(view.get("is_secure", "false").lower() == "true"), ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 2bd8e8017f5492..4ceeb8560c1758 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -431,6 +431,8 @@ def _process_schema( default_db=db_name, default_schema=schema_name, ) + elif view.is_secure: + self.report.num_secure_views_missing_definition += 1 if self.config.include_technical_schema: for view in views: @@ -749,8 +751,21 @@ def get_dataset_properties( ) -> DatasetProperties: custom_properties = {} - if isinstance(table, SnowflakeTable) and table.clustering_key: - custom_properties["CLUSTERING_KEY"] = table.clustering_key + if isinstance(table, SnowflakeTable): + if table.clustering_key: + custom_properties["CLUSTERING_KEY"] = table.clustering_key + + if table.is_hybrid: + custom_properties["IS_HYBRID"] = "true" + + if table.is_dynamic: + custom_properties["IS_DYNAMIC"] = "true" + + if table.is_iceberg: + custom_properties["IS_ICEBERG"] = "true" + + if isinstance(table, SnowflakeView) and table.is_secure: + custom_properties["IS_SECURE"] = "true" return DatasetProperties( name=table.name,