From bf7457335870ef9cceb6f52330e13260d6951273 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 7 Aug 2024 12:05:43 -0700 Subject: [PATCH] fix(snowflake): avoid reporting warnings/info for sys tables --- .../src/datahub/ingestion/api/source.py | 2 +- .../source/snowflake/snowflake_schema_gen.py | 2 +- .../source/snowflake/snowflake_utils.py | 20 +++++++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index a4de8b382430c..3dea3d36f41f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -47,7 +47,7 @@ logger = logging.getLogger(__name__) -_MAX_CONTEXT_STRING_LENGTH = 300 +_MAX_CONTEXT_STRING_LENGTH = 1000 class SourceCapability(Enum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 1d4a5b377da14..a64589bcfed02 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -440,7 +440,7 @@ def _process_schema( yield from self._process_tag(tag) if not snowflake_schema.views and not snowflake_schema.tables: - self.structured_reporter.warning( + self.structured_reporter.info( title="No tables/views found in schema", message="If tables exist, please grant REFERENCES or SELECT permissions on them.", context=f"{db_name}.{schema_name}", diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index a1878963d3798..0177d59ef6b21 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -127,6 +127,8 @@ def is_dataset_pattern_allowed( SnowflakeObjectDomain.MATERIALIZED_VIEW, ): return False + if _is_sys_table(dataset_name): + return False if len(dataset_params) != 3: self.structured_reporter.info( @@ -176,6 +178,11 @@ def _combine_identifier_parts( return f"{db_name}.{schema_name}.{table_name}" +def _is_sys_table(table_name: str) -> bool: + # Often will look like `SYS$_UNPIVOT_VIEW1737` or `sys$_pivot_view19`. + return table_name.lower().startswith("sys$") + + # Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers, # For example "test-database"."test-schema".test_table # whereas we generate urns without quotes even for quoted identifiers for backward compatibility @@ -186,12 +193,13 @@ def _cleanup_qualified_name( ) -> str: name_parts = qualified_name.split(".") if len(name_parts) != 3: - structured_reporter.info( - title="Unexpected dataset pattern", - message="We failed to parse a Snowflake qualified name into its constituent parts. " - "DB/schema/table filtering may not work as expected on these entities.", - context=f"{qualified_name} has {len(name_parts)} parts", - ) + if not _is_sys_table(qualified_name): + structured_reporter.info( + title="Unexpected dataset pattern", + message="We failed to parse a Snowflake qualified name into its constituent parts. " + "DB/schema/table filtering may not work as expected on these entities.", + context=f"{qualified_name} has {len(name_parts)} parts", + ) return qualified_name.replace('"', "") return _combine_identifier_parts( db_name=name_parts[0].strip('"'),