Skip to content

Commit

Permalink
fix(bigquery): Early expand only aliased names in GROUP BY (#4246)
Browse files Browse the repository at this point in the history
* fix(bigquery): Expand only aliased names in GROUP BY

* PR Feedback 1

* PR Feedback 2
  • Loading branch information
VaggelisD authored Oct 15, 2024
1 parent fff00a8 commit fcc05c9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
13 changes: 10 additions & 3 deletions sqlglot/optimizer/qualify_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,21 @@ def _expand_alias_refs(scope: Scope, resolver: Resolver, expand_only_groupby: bo
def replace_columns(
node: t.Optional[exp.Expression], resolve_table: bool = False, literal_index: bool = False
) -> None:
if not node or (expand_only_groupby and not isinstance(node, exp.Group)):
is_group_by = isinstance(node, exp.Group)
if not node or (expand_only_groupby and not is_group_by):
return

for column in walk_in_scope(node, prune=lambda node: node.is_star):
if not isinstance(column, exp.Column):
continue

# BigQuery's GROUP BY allows alias expansion only for standalone names, e.g:
# SELECT FUNC(col) AS col FROM t GROUP BY col --> Can be expanded
# SELECT FUNC(col) AS col FROM t GROUP BY FUNC(col) --> Shouldn't be expanded, will result to FUNC(FUNC(col))
# This not required for the HAVING clause as it can evaluate expressions using both the alias & the table columns
if expand_only_groupby and is_group_by and column.parent is not node:
continue

table = resolver.get_table(column.name) if resolve_table and not column.table else None
alias_expr, i = alias_to_expression.get(column.name, (None, 1))
double_agg = (
Expand All @@ -273,9 +281,8 @@ def replace_columns(
if simplified is not column:
column.replace(simplified)

for i, projection in enumerate(scope.expression.selects):
for i, projection in enumerate(expression.selects):
replace_columns(projection)

if isinstance(projection, exp.Alias):
alias_to_expression[projection.alias] = (projection.this, i + 1)

Expand Down
10 changes: 10 additions & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,16 @@ def test_expand_alias_refs(self):
"WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1",
)

# Edge case: BigQuery shouldn't expand aliases in complex expressions
sql = "WITH data AS (SELECT 1 AS id) SELECT FUNC(id) AS id FROM data GROUP BY FUNC(id)"
self.assertEqual(
optimizer.qualify_columns.qualify_columns(
parse_one(sql, dialect="bigquery"),
schema=MappingSchema(schema=unused_schema, dialect="bigquery"),
).sql(),
"WITH data AS (SELECT 1 AS id) SELECT FUNC(data.id) AS id FROM data GROUP BY FUNC(data.id)",
)

def test_optimize_joins(self):
self.check_file(
"optimize_joins",
Expand Down

0 comments on commit fcc05c9

Please sign in to comment.