From 6f32e5348d9aeba9c5d51a892023b2e14e072119 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 15 Oct 2024 15:32:32 -0700 Subject: [PATCH] Feat: support non-strict qualify_columns (#4243) * Feat: support qualify_columns non-strictly * add new flag --- sqlglot/optimizer/qualify.py | 3 +++ sqlglot/optimizer/qualify_columns.py | 13 ++++++++++--- tests/fixtures/optimizer/qualify_columns.sql | 6 ++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sqlglot/optimizer/qualify.py b/sqlglot/optimizer/qualify.py index 0c5014fdb7..03ab6ba8f3 100644 --- a/sqlglot/optimizer/qualify.py +++ b/sqlglot/optimizer/qualify.py @@ -27,6 +27,7 @@ def qualify( infer_schema: t.Optional[bool] = None, isolate_tables: bool = False, qualify_columns: bool = True, + allow_partial_qualification: bool = False, validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, @@ -56,6 +57,7 @@ def qualify( infer_schema: Whether to infer the schema if missing. isolate_tables: Whether to isolate table selects. qualify_columns: Whether to qualify columns. + allow_partial_qualification: Whether to allow partial qualification. validate_qualify_columns: Whether to validate columns. quote_identifiers: Whether to run the quote_identifiers step. This step is necessary to ensure correctness for case sensitive queries. @@ -90,6 +92,7 @@ def qualify( expand_alias_refs=expand_alias_refs, expand_stars=expand_stars, infer_schema=infer_schema, + allow_partial_qualification=allow_partial_qualification, ) if quote_identifiers: diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 5abe70e137..4e0e7d8217 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -22,6 +22,7 @@ def qualify_columns( expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: t.Optional[bool] = None, + allow_partial_qualification: bool = False, ) -> exp.Expression: """ Rewrite sqlglot AST to have fully qualified columns. @@ -41,6 +42,7 @@ def qualify_columns( for most of the optimizer's rules to work; do not set to False unless you know what you're doing! infer_schema: Whether to infer the schema if missing. + allow_partial_qualification: Whether to allow partial qualification. Returns: The qualified expression. @@ -68,7 +70,7 @@ def qualify_columns( ) _convert_columns_to_dots(scope, resolver) - _qualify_columns(scope, resolver) + _qualify_columns(scope, resolver, allow_partial_qualification=allow_partial_qualification) if not schema.empty and expand_alias_refs: _expand_alias_refs(scope, resolver) @@ -441,7 +443,7 @@ def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None: scope.clear_cache() -def _qualify_columns(scope: Scope, resolver: Resolver) -> None: +def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualification: bool) -> None: """Disambiguate columns, ensuring each column specifies a source""" for column in scope.columns: column_table = column.table @@ -449,7 +451,12 @@ def _qualify_columns(scope: Scope, resolver: Resolver) -> None: if column_table and column_table in scope.sources: source_columns = resolver.get_source_columns(column_table) - if source_columns and column_name not in source_columns and "*" not in source_columns: + if ( + not allow_partial_qualification + and source_columns + and column_name not in source_columns + and "*" not in source_columns + ): raise OptimizeError(f"Unknown column: {column_name}") if not column_table: diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql index 68c0caacfc..2c99a81da5 100644 --- a/tests/fixtures/optimizer/qualify_columns.sql +++ b/tests/fixtures/optimizer/qualify_columns.sql @@ -190,6 +190,12 @@ SELECT x._col_0 AS _col_0, x._col_1 AS _col_1 FROM (VALUES (1, 2)) AS x(_col_0, SELECT SOME_UDF(data).* FROM t; SELECT SOME_UDF(t.data).* FROM t AS t; +# execute: false +# allow_partial_qualification: true +# validate_qualify_columns: false +SELECT a + 1 AS i, missing_column FROM x; +SELECT x.a + 1 AS i, missing_column AS missing_column FROM x AS x; + -------------------------------------- -- Derived tables --------------------------------------