Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest/snowflake): ingest secure, dynamic, hybrid table metadata #12094

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ def tables_for_database(db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
auto_clustering_on AS "AUTO_CLUSTERING_ON"
auto_clustering_on AS "AUTO_CLUSTERING_ON",
is_dynamic AS "IS_DYNAMIC",
is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
WHERE table_schema != 'INFORMATION_SCHEMA'
and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
Expand All @@ -149,7 +151,9 @@ def tables_for_schema(schema_name: str, db_name: Optional[str]) -> str:
row_count AS "ROW_COUNT",
bytes AS "BYTES",
clustering_key AS "CLUSTERING_KEY",
auto_clustering_on AS "AUTO_CLUSTERING_ON"
auto_clustering_on AS "AUTO_CLUSTERING_ON",
is_dynamic AS "IS_DYNAMIC",
is_iceberg AS "IS_ICEBERG"
FROM {db_clause}information_schema.tables t
where table_schema='{schema_name}'
and table_type in ('BASE TABLE', 'EXTERNAL TABLE', 'HYBRID TABLE')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class SnowflakeV2Report(
external_lineage_queries_secs: float = -1
num_tables_with_known_upstreams: int = 0
num_upstream_lineage_edge_parsing_failed: int = 0
num_secure_views_missing_definition: int = 0

data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ class SnowflakeTable(BaseTable):
foreign_keys: List[SnowflakeFK] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
is_dynamic: bool = False
is_iceberg: bool = False

@property
def is_hybrid(self) -> bool:
return self.type is not None and self.type == "HYBRID TABLE"


@dataclass
Expand All @@ -98,6 +104,7 @@ class SnowflakeView(BaseView):
columns: List[SnowflakeColumn] = field(default_factory=list)
tags: Optional[List[SnowflakeTag]] = None
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
is_secure: bool = False


@dataclass
Expand Down Expand Up @@ -289,6 +296,8 @@ def get_tables_for_database(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
Expand All @@ -313,6 +322,8 @@ def get_tables_for_schema(
rows_count=table["ROW_COUNT"],
comment=table["COMMENT"],
clustering_key=table["CLUSTERING_KEY"],
is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
)
)
return tables
Expand Down Expand Up @@ -356,6 +367,7 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]
materialized=(
view.get("is_materialized", "false").lower() == "true"
),
is_secure=(view.get("is_secure", "false").lower() == "true"),
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ def _process_schema(
default_db=db_name,
default_schema=schema_name,
)
elif view.is_secure:
self.report.num_secure_views_missing_definition += 1

if self.config.include_technical_schema:
for view in views:
Expand Down Expand Up @@ -749,8 +751,21 @@ def get_dataset_properties(
) -> DatasetProperties:
custom_properties = {}

if isinstance(table, SnowflakeTable) and table.clustering_key:
custom_properties["CLUSTERING_KEY"] = table.clustering_key
if isinstance(table, SnowflakeTable):
if table.clustering_key:
custom_properties["CLUSTERING_KEY"] = table.clustering_key

if table.is_hybrid:
custom_properties["IS_HYBRID"] = "true"

if table.is_dynamic:
custom_properties["IS_DYNAMIC"] = "true"

if table.is_iceberg:
custom_properties["IS_ICEBERG"] = "true"

if isinstance(table, SnowflakeView) and table.is_secure:
custom_properties["IS_SECURE"] = "true"

return DatasetProperties(
name=table.name,
Expand Down
Loading