From 9340380f452a2d0704cb1329eefdf14ef30f92ff Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jun 2024 10:24:46 -0400 Subject: [PATCH 1/4] Consolidate Expr::unalias_nested --- datafusion/expr/src/expr.rs | 30 +++++++++++----- datafusion/expr/src/logical_plan/plan.rs | 35 +------------------ .../optimizer/src/common_subexpr_eliminate.rs | 9 +++-- .../optimizer/src/optimize_projections/mod.rs | 2 +- 4 files changed, 29 insertions(+), 47 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9ba866a4c919..b546a416be2b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -33,7 +33,9 @@ use crate::{ use crate::{window_frame, Volatility}; use arrow::datatypes::{DataType, FieldRef}; -use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; +use datafusion_common::tree_node::{ + Transformed, TransformedResult, TreeNode, TreeNodeRecursion, +}; use datafusion_common::{ internal_err, plan_err, Column, DFSchema, Result, ScalarValue, TableReference, }; @@ -1193,9 +1195,8 @@ impl Expr { /// Recursively potentially multiple aliases from an expression. /// - /// If the expression is not an alias, the expression is returned unchanged. - /// This method removes directly nested aliases, but not other nested - /// aliases. + /// This method removes nested aliases and returns `Transformed` + /// to signal if the expression was changed. /// /// # Example /// ``` @@ -1212,11 +1213,22 @@ impl Expr { /// let expr = col("foo").alias("bar").alias("baz"); /// assert_eq!(expr.unalias_nested(), col("foo")); /// ``` - pub fn unalias_nested(self) -> Expr { - match self { - Expr::Alias(alias) => alias.expr.unalias_nested(), - _ => self, - } + pub fn unalias_nested(self) -> Transformed { + self.transform_down(|expr| { + match expr { + Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) => { + // subqueries could contain aliases so we don't recurse into those + Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)) + } + Expr::Alias(Alias { expr, .. }) => { + // directly recurse into the alias expression + // as we just modified the tree structure + Ok(Transformed::yes(expr.unalias_nested().data)) + } + _ => Ok(Transformed::no(expr)), + } + }) + .expect("unalias_nested doesn't return err") } /// Return `self IN ` if `negated` is false, otherwise diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 85958223ac97..0b3749b5d81d 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -869,8 +869,7 @@ impl LogicalPlan { } LogicalPlan::Filter { .. } => { assert_eq!(1, expr.len()); - let predicate = expr.pop().unwrap(); - let predicate = Filter::remove_aliases(predicate)?.data; + let predicate = expr.pop().unwrap().unalias_nested().data; Filter::try_new(predicate, Arc::new(inputs.swap_remove(0))) .map(LogicalPlan::Filter) @@ -2200,38 +2199,6 @@ impl Filter { } false } - - /// Remove aliases from a predicate for use in a `Filter` - /// - /// filter predicates should not contain aliased expressions so we remove - /// any aliases. - /// - /// before this logic was added we would have aliases within filters such as - /// for benchmark q6: - /// - /// ```sql - /// lineitem.l_shipdate >= Date32(\"8766\") - /// AND lineitem.l_shipdate < Date32(\"9131\") - /// AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount >= - /// Decimal128(Some(49999999999999),30,15) - /// AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <= - /// Decimal128(Some(69999999999999),30,15) - /// AND lineitem.l_quantity < Decimal128(Some(2400),15,2) - /// ``` - pub fn remove_aliases(predicate: Expr) -> Result> { - predicate.transform_down(|expr| { - match expr { - Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) => { - // subqueries could contain aliases so we don't recurse into those - Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)) - } - Expr::Alias(Alias { expr, .. }) => { - Ok(Transformed::new(*expr, true, TreeNodeRecursion::Jump)) - } - _ => Ok(Transformed::no(expr)), - } - }) - } } /// Window its input based on a set of window spec and window function (e.g. SUM or RANK) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 7f4093ba110e..649835b638d1 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -259,9 +259,12 @@ impl CommonSubexprEliminate { self.try_unary_plan(expr, input, config)? .transform_data(|(mut new_expr, new_input)| { assert_eq!(new_expr.len(), 1); // passed in vec![predicate] - let new_predicate = new_expr.pop().unwrap(); - Ok(Filter::remove_aliases(new_predicate)? - .update_data(|new_predicate| (new_predicate, new_input))) + let new_predicate = new_expr + .pop() + .unwrap() + .unalias_nested() + .update_data(|new_predicate| (new_predicate, new_input)); + Ok(new_predicate) })? .map_data(|(new_predicate, new_input)| { Filter::try_new(new_predicate, Arc::new(new_input)) diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 11540d3e162e..c6fa68f4a4ae 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -608,7 +608,7 @@ fn rewrite_expr(expr: Expr, input: &Projection) -> Result> { // * the current column is an expression "f" // // return the expression `d + e` (not `d + e` as f) - let input_expr = input.expr[idx].clone().unalias_nested(); + let input_expr = input.expr[idx].clone().unalias_nested().data; Ok(Transformed::yes(input_expr)) } // Unsupported type for consecutive projection merge analysis. From 745b3cbfa628b016d23d9ebf357039cea4743f7e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jun 2024 10:43:38 -0400 Subject: [PATCH 2/4] fix doc test --- datafusion/expr/src/expr.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index b546a416be2b..47ae2563ae8f 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1193,9 +1193,9 @@ impl Expr { } } - /// Recursively potentially multiple aliases from an expression. + /// Recursively removed potentially multiple aliases from an expression. /// - /// This method removes nested aliases and returns `Transformed` + /// This method removes nested aliases and returns [`Transformed`] /// to signal if the expression was changed. /// /// # Example @@ -1203,15 +1203,15 @@ impl Expr { /// # use datafusion_expr::col; /// // `foo as "bar"` is unaliased to `foo` /// let expr = col("foo").alias("bar"); - /// assert_eq!(expr.unalias_nested(), col("foo")); + /// assert_eq!(expr.unalias_nested().data, col("foo")); /// - /// // `foo as "bar" + baz` is not unaliased + /// // `foo as "bar" + baz` is unaliased /// let expr = col("foo").alias("bar") + col("baz"); - /// assert_eq!(expr.clone().unalias_nested(), expr); + /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz")); /// /// // `foo as "bar" as "baz" is unalaised to foo /// let expr = col("foo").alias("bar").alias("baz"); - /// assert_eq!(expr.unalias_nested(), col("foo")); + /// assert_eq!(expr.unalias_nested().data, col("foo")); /// ``` pub fn unalias_nested(self) -> Transformed { self.transform_down(|expr| { From b07100d68f0ae1e9f9cbbaad9715d128424e3c9b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jun 2024 14:00:33 -0400 Subject: [PATCH 3/4] Clarify unreachable code --- datafusion/expr/src/expr.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 47ae2563ae8f..198385f8efa3 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1228,7 +1228,8 @@ impl Expr { _ => Ok(Transformed::no(expr)), } }) - .expect("unalias_nested doesn't return err") + // unreachable code: internal closure doesn't return err + .unwrap() } /// Return `self IN ` if `negated` is false, otherwise From 0ab582633b9988fc461ecbddb2a38d71f1b8d0f5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jun 2024 17:15:16 -0400 Subject: [PATCH 4/4] Use transform_down_up to handle recursion --- datafusion/expr/src/expr.rs | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 198385f8efa3..f25954f830f7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1214,20 +1214,30 @@ impl Expr { /// assert_eq!(expr.unalias_nested().data, col("foo")); /// ``` pub fn unalias_nested(self) -> Transformed { - self.transform_down(|expr| { - match expr { - Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) => { - // subqueries could contain aliases so we don't recurse into those - Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)) - } - Expr::Alias(Alias { expr, .. }) => { - // directly recurse into the alias expression - // as we just modified the tree structure - Ok(Transformed::yes(expr.unalias_nested().data)) + self.transform_down_up( + |expr| { + // f_down: skip subqueries. Check in f_down to avoid recursing into them + let recursion = if matches!( + expr, + Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) + ) { + // subqueries could contain aliases so don't recurse into those + TreeNodeRecursion::Jump + } else { + TreeNodeRecursion::Continue + }; + Ok(Transformed::new(expr, false, recursion)) + }, + |expr| { + // f_up: unalias on up so we can remove nested aliases like + // `(x as foo) as bar` + if let Expr::Alias(Alias { expr, .. }) = expr { + Ok(Transformed::yes(*expr)) + } else { + Ok(Transformed::no(expr)) } - _ => Ok(Transformed::no(expr)), - } - }) + }, + ) // unreachable code: internal closure doesn't return err .unwrap() }