diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 0fd4fc6471..5abe70e137 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -240,13 +240,21 @@ def _expand_alias_refs(scope: Scope, resolver: Resolver, expand_only_groupby: bo def replace_columns( node: t.Optional[exp.Expression], resolve_table: bool = False, literal_index: bool = False ) -> None: - if not node or (expand_only_groupby and not isinstance(node, exp.Group)): + is_group_by = isinstance(node, exp.Group) + if not node or (expand_only_groupby and not is_group_by): return for column in walk_in_scope(node, prune=lambda node: node.is_star): if not isinstance(column, exp.Column): continue + # BigQuery's GROUP BY allows alias expansion only for standalone names, e.g: + # SELECT FUNC(col) AS col FROM t GROUP BY col --> Can be expanded + # SELECT FUNC(col) AS col FROM t GROUP BY FUNC(col) --> Shouldn't be expanded, will result to FUNC(FUNC(col)) + # This not required for the HAVING clause as it can evaluate expressions using both the alias & the table columns + if expand_only_groupby and is_group_by and column.parent is not node: + continue + table = resolver.get_table(column.name) if resolve_table and not column.table else None alias_expr, i = alias_to_expression.get(column.name, (None, 1)) double_agg = ( @@ -273,9 +281,8 @@ def replace_columns( if simplified is not column: column.replace(simplified) - for i, projection in enumerate(scope.expression.selects): + for i, projection in enumerate(expression.selects): replace_columns(projection) - if isinstance(projection, exp.Alias): alias_to_expression[projection.alias] = (projection.this, i + 1) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 2a51d84cac..93132859b7 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -602,6 +602,16 @@ def test_expand_alias_refs(self): "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1", ) + # Edge case: BigQuery shouldn't expand aliases in complex expressions + sql = "WITH data AS (SELECT 1 AS id) SELECT FUNC(id) AS id FROM data GROUP BY FUNC(id)" + self.assertEqual( + optimizer.qualify_columns.qualify_columns( + parse_one(sql, dialect="bigquery"), + schema=MappingSchema(schema=unused_schema, dialect="bigquery"), + ).sql(), + "WITH data AS (SELECT 1 AS id) SELECT FUNC(data.id) AS id FROM data GROUP BY FUNC(data.id)", + ) + def test_optimize_joins(self): self.check_file( "optimize_joins",