-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
make common expression alias human-readable #10333
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
dfb5a86
use only current expr symbol + remove visit stack
MohamedAbdeen21 c14a201
Fix tpch tests/benchmarks
MohamedAbdeen21 167f1a8
update docs
MohamedAbdeen21 9c92a76
removing some clones
MohamedAbdeen21 e880620
fix clippy
MohamedAbdeen21 6a41392
update docs
MohamedAbdeen21 72f1395
rebase
MohamedAbdeen21 43cda5e
restore docs
MohamedAbdeen21 f54a35a
fix clippy
MohamedAbdeen21 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,8 +113,8 @@ type CommonExprs = IndexMap<Identifier, Expr>; | |
/// refer to that new column. | ||
/// | ||
/// ```text | ||
/// ProjectionExec(exprs=[extract (day from new_col), extract (year from new_col)]) <-- reuse here | ||
/// ProjectionExec(exprs=[to_date(c1) as new_col]) <-- compute to_date once | ||
/// ProjectionExec(exprs=[extract (day from #{new_col}), extract (year from #{new_col})]) <-- reuse here | ||
/// ProjectionExec(exprs=[to_date(c1) as #{new_col}]) <-- compute to_date once | ||
/// ``` | ||
pub struct CommonSubexprEliminate {} | ||
|
||
|
@@ -347,7 +347,7 @@ impl CommonSubexprEliminate { | |
agg_exprs.push(expr.alias(&name)); | ||
proj_exprs.push(Expr::Column(Column::from_name(name))); | ||
} else { | ||
let id = expr_identifier(&expr_rewritten, "".to_string()); | ||
let id = expr_identifier(&expr_rewritten); | ||
let (qualifier, field) = | ||
expr_rewritten.to_field(&new_input_schema)?; | ||
let out_name = qualified_name(qualifier.as_ref(), field.name()); | ||
|
@@ -438,12 +438,12 @@ impl OptimizerRule for CommonSubexprEliminate { | |
} | ||
}; | ||
|
||
let original_schema = plan.schema().clone(); | ||
let original_schema = plan.schema(); | ||
match optimized_plan { | ||
Some(optimized_plan) if optimized_plan.schema() != &original_schema => { | ||
Some(optimized_plan) if optimized_plan.schema() != original_schema => { | ||
// add an additional projection if the output schema changed. | ||
Ok(Some(build_recover_project_plan( | ||
&original_schema, | ||
original_schema, | ||
optimized_plan, | ||
)?)) | ||
} | ||
|
@@ -656,24 +656,16 @@ enum VisitRecord { | |
EnterMark(usize), | ||
/// the node's children were skipped => jump to f_up on same node | ||
JumpMark, | ||
/// Accumulated identifier of sub expression. | ||
ExprItem(Identifier), | ||
} | ||
|
||
impl ExprIdentifierVisitor<'_> { | ||
/// Find the first `EnterMark` in the stack, and accumulates every `ExprItem` | ||
/// before it. | ||
fn pop_enter_mark(&mut self) -> Option<(usize, Identifier)> { | ||
let mut desc = String::new(); | ||
|
||
while let Some(item) = self.visit_stack.pop() { | ||
fn pop_enter_mark(&mut self) -> Option<usize> { | ||
if let Some(item) = self.visit_stack.pop() { | ||
match item { | ||
VisitRecord::EnterMark(idx) => { | ||
return Some((idx, desc)); | ||
} | ||
VisitRecord::ExprItem(id) => { | ||
desc.push('|'); | ||
desc.push_str(&id); | ||
return Some(idx); | ||
} | ||
VisitRecord::JumpMark => return None, | ||
} | ||
|
@@ -705,11 +697,11 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { | |
} | ||
|
||
fn f_up(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> { | ||
let Some((down_index, sub_expr_id)) = self.pop_enter_mark() else { | ||
let Some(down_index) = self.pop_enter_mark() else { | ||
return Ok(TreeNodeRecursion::Continue); | ||
}; | ||
|
||
let expr_id = expr_identifier(expr, sub_expr_id); | ||
let expr_id = expr_identifier(expr); | ||
|
||
self.id_array[down_index].0 = self.up_index; | ||
if !self.expr_mask.ignores(expr) { | ||
|
@@ -724,15 +716,14 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { | |
.or_insert((0, data_type)); | ||
*count += 1; | ||
} | ||
self.visit_stack.push(VisitRecord::ExprItem(expr_id)); | ||
self.up_index += 1; | ||
|
||
Ok(TreeNodeRecursion::Continue) | ||
} | ||
} | ||
|
||
fn expr_identifier(expr: &Expr, sub_expr_identifier: Identifier) -> Identifier { | ||
format!("{{{expr}{sub_expr_identifier}}}") | ||
fn expr_identifier(expr: &Expr) -> Identifier { | ||
format!("#{{{expr}}}") | ||
} | ||
|
||
/// Go through an expression tree and generate identifier for every node in this tree. | ||
|
@@ -885,15 +876,15 @@ mod test { | |
)?; | ||
|
||
let expected = vec![ | ||
(8, "{(SUM(a + Int32(1)) - AVG(c)) * Int32(2)|{Int32(2)}|{SUM(a + Int32(1)) - AVG(c)|{AVG(c)|{c}}|{SUM(a + Int32(1))|{a + Int32(1)|{Int32(1)}|{a}}}}}"), | ||
(6, "{SUM(a + Int32(1)) - AVG(c)|{AVG(c)|{c}}|{SUM(a + Int32(1))|{a + Int32(1)|{Int32(1)}|{a}}}}"), | ||
(8, "#{(SUM(a + Int32(1)) - AVG(c)) * Int32(2)}"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
(6, "#{SUM(a + Int32(1)) - AVG(c)}"), | ||
(3, ""), | ||
(2, "{a + Int32(1)|{Int32(1)}|{a}}"), | ||
(2, "#{a + Int32(1)}"), | ||
(0, ""), | ||
(1, ""), | ||
(5, ""), | ||
(4, ""), | ||
(7, "") | ||
(7, ""), | ||
] | ||
.into_iter() | ||
.map(|(number, id)| (number, id.into())) | ||
|
@@ -911,15 +902,15 @@ mod test { | |
)?; | ||
|
||
let expected = vec![ | ||
(8, "{(SUM(a + Int32(1)) - AVG(c)) * Int32(2)|{Int32(2)}|{SUM(a + Int32(1)) - AVG(c)|{AVG(c)|{c}}|{SUM(a + Int32(1))|{a + Int32(1)|{Int32(1)}|{a}}}}}"), | ||
(6, "{SUM(a + Int32(1)) - AVG(c)|{AVG(c)|{c}}|{SUM(a + Int32(1))|{a + Int32(1)|{Int32(1)}|{a}}}}"), | ||
(3, "{SUM(a + Int32(1))|{a + Int32(1)|{Int32(1)}|{a}}}"), | ||
(2, "{a + Int32(1)|{Int32(1)}|{a}}"), | ||
(8, "#{(SUM(a + Int32(1)) - AVG(c)) * Int32(2)}"), | ||
(6, "#{SUM(a + Int32(1)) - AVG(c)}"), | ||
(3, "#{SUM(a + Int32(1))}"), | ||
(2, "#{a + Int32(1)}"), | ||
(0, ""), | ||
(1, ""), | ||
(5, "{AVG(c)|{c}}"), | ||
(5, "#{AVG(c)}"), | ||
(4, ""), | ||
(7, "") | ||
(7, ""), | ||
] | ||
.into_iter() | ||
.map(|(number, id)| (number, id.into())) | ||
|
@@ -951,8 +942,8 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Aggregate: groupBy=[[]], aggr=[[SUM({test.a * (Int32(1) - test.b)|{Int32(1) - test.b|{test.b}|{Int32(1)}}|{test.a}} AS test.a * Int32(1) - test.b), SUM({test.a * (Int32(1) - test.b)|{Int32(1) - test.b|{test.b}|{Int32(1)}}|{test.a}} AS test.a * Int32(1) - test.b * (Int32(1) + test.c))]]\ | ||
\n Projection: test.a * (Int32(1) - test.b) AS {test.a * (Int32(1) - test.b)|{Int32(1) - test.b|{test.b}|{Int32(1)}}|{test.a}}, test.a, test.b, test.c\ | ||
let expected = "Aggregate: groupBy=[[]], aggr=[[SUM(#{test.a * (Int32(1) - test.b)} AS test.a * Int32(1) - test.b), SUM(#{test.a * (Int32(1) - test.b)} AS test.a * Int32(1) - test.b * (Int32(1) + test.c))]]\ | ||
\n Projection: test.a * (Int32(1) - test.b) AS #{test.a * (Int32(1) - test.b)}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1004,8 +995,8 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Projection: {AVG(test.a)|{test.a}} AS AVG(test.a) AS col1, {AVG(test.a)|{test.a}} AS AVG(test.a) AS col2, col3, {AVG(test.c)} AS AVG(test.c), {my_agg(test.a)|{test.a}} AS my_agg(test.a) AS col4, {my_agg(test.a)|{test.a}} AS my_agg(test.a) AS col5, col6, {my_agg(test.c)} AS my_agg(test.c)\ | ||
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS {AVG(test.a)|{test.a}}, my_agg(test.a) AS {my_agg(test.a)|{test.a}}, AVG(test.b) AS col3, AVG(test.c) AS {AVG(test.c)}, my_agg(test.b) AS col6, my_agg(test.c) AS {my_agg(test.c)}]]\ | ||
let expected = "Projection: #{AVG(test.a)} AS AVG(test.a) AS col1, #{AVG(test.a)} AS AVG(test.a) AS col2, col3, #{AVG(test.c)} AS AVG(test.c), #{my_agg(test.a)} AS my_agg(test.a) AS col4, #{my_agg(test.a)} AS my_agg(test.a) AS col5, col6, #{my_agg(test.c)} AS my_agg(test.c)\ | ||
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS #{AVG(test.a)}, my_agg(test.a) AS #{my_agg(test.a)}, AVG(test.b) AS col3, AVG(test.c) AS #{AVG(test.c)}, my_agg(test.b) AS col6, my_agg(test.c) AS #{my_agg(test.c)}]]\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1023,8 +1014,8 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Projection: Int32(1) + {AVG(test.a)|{test.a}} AS AVG(test.a), Int32(1) - {AVG(test.a)|{test.a}} AS AVG(test.a), Int32(1) + {my_agg(test.a)|{test.a}} AS my_agg(test.a), Int32(1) - {my_agg(test.a)|{test.a}} AS my_agg(test.a)\ | ||
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS {AVG(test.a)|{test.a}}, my_agg(test.a) AS {my_agg(test.a)|{test.a}}]]\ | ||
let expected = "Projection: Int32(1) + #{AVG(test.a)} AS AVG(test.a), Int32(1) - #{AVG(test.a)} AS AVG(test.a), Int32(1) + #{my_agg(test.a)} AS my_agg(test.a), Int32(1) - #{my_agg(test.a)} AS my_agg(test.a)\ | ||
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS #{AVG(test.a)}, my_agg(test.a) AS #{my_agg(test.a)}]]\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1040,7 +1031,9 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Aggregate: groupBy=[[]], aggr=[[AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS col1, my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS col2]]\n Projection: UInt32(1) + test.a AS {UInt32(1) + test.a|{test.a}|{UInt32(1)}}, test.a, test.b, test.c\n TableScan: test"; | ||
let expected = "Aggregate: groupBy=[[]], aggr=[[AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS col1, my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS col2]]\ | ||
\n Projection: UInt32(1) + test.a AS #{UInt32(1) + test.a}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
||
|
@@ -1055,8 +1048,8 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Aggregate: groupBy=[[{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a]], aggr=[[AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS col1, my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS col2]]\ | ||
\n Projection: UInt32(1) + test.a AS {UInt32(1) + test.a|{test.a}|{UInt32(1)}}, test.a, test.b, test.c\ | ||
let expected = "Aggregate: groupBy=[[#{UInt32(1) + test.a} AS UInt32(1) + test.a]], aggr=[[AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS col1, my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS col2]]\ | ||
\n Projection: UInt32(1) + test.a AS #{UInt32(1) + test.a}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1076,9 +1069,9 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Projection: UInt32(1) + test.a, UInt32(1) + {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS AVG(UInt32(1) + test.a) AS col1, UInt32(1) - {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS AVG(UInt32(1) + test.a) AS col2, {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS AVG(UInt32(1) + test.a), UInt32(1) + {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS my_agg(UInt32(1) + test.a) AS col3, UInt32(1) - {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS my_agg(UInt32(1) + test.a) AS col4, {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}} AS my_agg(UInt32(1) + test.a)\ | ||
\n Aggregate: groupBy=[[{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a]], aggr=[[AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}}, my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}}]]\ | ||
\n Projection: UInt32(1) + test.a AS {UInt32(1) + test.a|{test.a}|{UInt32(1)}}, test.a, test.b, test.c\ | ||
let expected = "Projection: UInt32(1) + test.a, UInt32(1) + #{AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS AVG(UInt32(1) + test.a) AS col1, UInt32(1) - #{AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS AVG(UInt32(1) + test.a) AS col2, #{AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS AVG(UInt32(1) + test.a), UInt32(1) + #{my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS my_agg(UInt32(1) + test.a) AS col3, UInt32(1) - #{my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS my_agg(UInt32(1) + test.a) AS col4, #{my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a)} AS my_agg(UInt32(1) + test.a)\ | ||
\n Aggregate: groupBy=[[#{UInt32(1) + test.a} AS UInt32(1) + test.a]], aggr=[[AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS #{AVG(#{UInt32(1) + test.a} AS UInt32(1) + test.a)}, my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a) AS #{my_agg(#{UInt32(1) + test.a} AS UInt32(1) + test.a)}]]\ | ||
\n Projection: UInt32(1) + test.a AS #{UInt32(1) + test.a}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1103,9 +1096,9 @@ mod test { | |
)? | ||
.build()?; | ||
|
||
let expected = "Projection: table.test.col.a, UInt32(1) + {AVG({UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a)|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}}}}} AS AVG(UInt32(1) + table.test.col.a), {AVG({UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a)|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}}}}} AS AVG(UInt32(1) + table.test.col.a)\ | ||
\n Aggregate: groupBy=[[table.test.col.a]], aggr=[[AVG({UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a) AS {AVG({UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a)|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}} AS UInt32(1) + table.test.col.a|{{UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}}}}}]]\ | ||
\n Projection: UInt32(1) + table.test.col.a AS {UInt32(1) + table.test.col.a|{table.test.col.a}|{UInt32(1)}}, table.test.col.a\ | ||
let expected = "Projection: table.test.col.a, UInt32(1) + #{AVG(#{UInt32(1) + table.test.col.a} AS UInt32(1) + table.test.col.a)} AS AVG(UInt32(1) + table.test.col.a), #{AVG(#{UInt32(1) + table.test.col.a} AS UInt32(1) + table.test.col.a)} AS AVG(UInt32(1) + table.test.col.a)\ | ||
\n Aggregate: groupBy=[[table.test.col.a]], aggr=[[AVG(#{UInt32(1) + table.test.col.a} AS UInt32(1) + table.test.col.a) AS #{AVG(#{UInt32(1) + table.test.col.a} AS UInt32(1) + table.test.col.a)}]]\ | ||
\n Projection: UInt32(1) + table.test.col.a AS #{UInt32(1) + table.test.col.a}, table.test.col.a\ | ||
\n TableScan: table.test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1124,8 +1117,8 @@ mod test { | |
])? | ||
.build()?; | ||
|
||
let expected = "Projection: {Int32(1) + test.a|{test.a}|{Int32(1)}} AS Int32(1) + test.a AS first, {Int32(1) + test.a|{test.a}|{Int32(1)}} AS Int32(1) + test.a AS second\ | ||
\n Projection: Int32(1) + test.a AS {Int32(1) + test.a|{test.a}|{Int32(1)}}, test.a, test.b, test.c\ | ||
let expected = "Projection: #{Int32(1) + test.a} AS Int32(1) + test.a AS first, #{Int32(1) + test.a} AS Int32(1) + test.a AS second\ | ||
\n Projection: Int32(1) + test.a AS #{Int32(1) + test.a}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
@@ -1300,8 +1293,8 @@ mod test { | |
.build()?; | ||
|
||
let expected = "Projection: test.a, test.b, test.c\ | ||
\n Filter: {Int32(1) + test.a|{test.a}|{Int32(1)}} - Int32(10) > {Int32(1) + test.a|{test.a}|{Int32(1)}}\ | ||
\n Projection: Int32(1) + test.a AS {Int32(1) + test.a|{test.a}|{Int32(1)}}, test.a, test.b, test.c\ | ||
\n Filter: #{Int32(1) + test.a} - Int32(10) > #{Int32(1) + test.a}\ | ||
\n Projection: Int32(1) + test.a AS #{Int32(1) + test.a}, test.a, test.b, test.c\ | ||
\n TableScan: test"; | ||
|
||
assert_optimized_plan_eq(expected, &plan); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We shoudn't change this part.
The logic that builds up an identifier using
visit_stack
/ 3 kinds ofVisitRecord
is neccessary and actually a very clever and way to build up an identifier from the current node and sub-identifiers. (An identifier to be aString
was not that a clever decision and will be fixed in #10426, but that's a different issue).This PR shouldn't change what an identifier is / how it is built up otherwise we end up with identifier colliding bugs again. The
IdArray
,ExprStats
andCommonExprs
datastructures require an identifier to represent a full expression subtreee. This means that:would cause bugs as shown in 1. of #10396.
I.e. if we encountered both
col("a") + col("b")
andcol("a + b")
in the expression list to be CSEd and we used"{expr}"
(the non-unique stringified representation) as identifiers then the equal identifier ("a + b"
) of those 2 different expressions would collide and we counted 2 for the occurance of one of the 2 expressions (and the other expression's count would be lost) resulting wrong CSE.Please note that currently the identifier of
col("a") + col("b")
is"{a + b|b|a}"
so it doesn't collide withcol("a + b")
's identifier:"{a + b}"
.Again, this is hard to test now because of the resolution bug: #10413.
I.e. if we write a test where we have
then currently it gets resolved as
and this prevents us to create a test case for CSE identifier collision.
(Please note that I'm simplifying the identifier collision exmple as simple columns (
col("a + b")
) are not subject to CSE.)What this PR can do is to change the aliases (use something else than identifiers) to make the plans more readable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@peter-toth -- thank you -- I see now the danger of the lack of test coverage in this area.
I am still a little unclear on if the bug you described above is something that can actually be hit in practice (due to lack of a test case), or if it will be masked by #10413
What shall we do now? I can see three possibilities:
CommonSubexprEliminate
faster by stop copying so many strings #10426Please let me know your thoughts. I can potentially help with 1 or 2.
cc @MohamedAbdeen21
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good question. I think once #10413 gets resolved we can hit the identifier collision issue due to this PR. But actually, let's try to add an identifier collision test case after #10413.
I would suggest 1., revert the commit and a new version of the PR where only the aliases are made simpler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Revert PR is merged: #10436
And I am pretty stoked that @jonahgao is looking at #10413 (comment)