Skip to content

Commit

Permalink
Change name of MAX/MIN udaf to lowercase max/min (#11795)
Browse files Browse the repository at this point in the history
* Only row 8 diff

* MAX -> max, MIN -> min

* Updating tests

* Removed aliases
  • Loading branch information
edmondop authored Aug 5, 2024
1 parent 6aad19f commit 45d85b1
Show file tree
Hide file tree
Showing 30 changed files with 446 additions and 790 deletions.
2 changes: 1 addition & 1 deletion datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2060,7 +2060,7 @@ mod tests {

assert_batches_sorted_eq!(
["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
"| c1 | min(aggregate_test_100.c12) | max(aggregate_test_100.c12) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
"+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
"| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ where
/// assert_batches_eq!(
/// &[
/// "+---+----------------+",
/// "| a | MIN(?table?.b) |",
/// "| a | min(?table?.b) |",
/// "+---+----------------+",
/// "| 1 | 2 |",
/// "+---+----------------+",
Expand All @@ -182,14 +182,14 @@ where
/// let mut ctx = SessionContext::new();
/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
/// let results = ctx
/// .sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")
/// .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
/// .await?
/// .collect()
/// .await?;
/// assert_batches_eq!(
/// &[
/// "+---+----------------+",
/// "| a | MIN(example.b) |",
/// "| a | min(example.b) |",
/// "+---+----------------+",
/// "| 1 | 2 |",
/// "+---+----------------+",
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
//!
//! let expected = vec![
//! "+---+----------------+",
//! "| a | MIN(?table?.b) |",
//! "| a | min(?table?.b) |",
//! "+---+----------------+",
//! "| 1 | 2 |",
//! "+---+----------------+"
Expand Down Expand Up @@ -114,7 +114,7 @@
//!
//! let expected = vec![
//! "+---+----------------+",
//! "| a | MIN(example.b) |",
//! "| a | min(example.b) |",
//! "+---+----------------+",
//! "| 1 | 2 |",
//! "+---+----------------+"
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/custom_sources_cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ async fn optimizers_catch_all_statistics() {
let expected = RecordBatch::try_new(
Arc::new(Schema::new(vec![
Field::new("count(*)", DataType::Int64, false),
Field::new("MIN(test.c1)", DataType::Int32, false),
Field::new("MAX(test.c1)", DataType::Int32, false),
Field::new("min(test.c1)", DataType::Int32, false),
Field::new("max(test.c1)", DataType::Int32, false),
])),
vec![
Arc::new(Int64Array::from(vec![4])),
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/tests/expr_api/parse_sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ async fn round_trip_parse_sql_expr() -> Result<()> {
"((a = 10) AND b NOT IN (20, 30))",
"sum(a)",
"(sum(a) + 1)",
"(MIN(a) + MAX(b))",
"(MIN(a) + (MAX(b) * sum(c)))",
"(MIN(a) + ((MAX(b) * sum(c)) / 10))",
"(min(a) + max(b))",
"(min(a) + (max(b) * sum(c)))",
"(min(a) + ((max(b) * sum(c)) / 10))",
];

for test in tests {
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,11 +615,11 @@ async fn test_physical_plan_display_indent() {
"GlobalLimitExec: skip=0, fetch=10",
" SortPreservingMergeExec: [the_min@2 DESC], fetch=10",
" SortExec: TopK(fetch=10), expr=[the_min@2 DESC], preserve_partitioning=[true]",
" ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)@2 as the_min]",
" AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" ProjectionExec: expr=[c1@0 as c1, max(aggregate_test_100.c12)@1 as max(aggregate_test_100.c12), min(aggregate_test_100.c12)@2 as the_min]",
" AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000",
" AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]",
" AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[max(aggregate_test_100.c12), min(aggregate_test_100.c12)]",
" CoalesceBatchesExec: target_batch_size=4096",
" FilterExec: c12@1 < 10",
" RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1",
Expand Down
12 changes: 6 additions & 6 deletions datafusion/expr/src/expr_rewriter/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_common::{Column, Result};

/// Rewrite sort on aggregate expressions to sort on the column of aggregate output
/// For example, `max(x)` is written to `col("MAX(x)")`
/// For example, `max(x)` is written to `col("max(x)")`
pub fn rewrite_sort_cols_by_aggs(
exprs: impl IntoIterator<Item = impl Into<Expr>>,
plan: &LogicalPlan,
Expand Down Expand Up @@ -108,7 +108,7 @@ fn rewrite_in_terms_of_projection(
};

// expr is an actual expr like min(t.c2), but we are looking
// for a column with the same "MIN(C2)", so translate there
// for a column with the same "min(C2)", so translate there
let name = normalized_expr.display_name()?;

let search_col = Expr::Column(Column {
Expand Down Expand Up @@ -237,15 +237,15 @@ mod test {
expected: sort(col("c1")),
},
TestCase {
desc: r#"min(c2) --> "MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
desc: r#"min(c2) --> "min(c2)" -- (column *named* "min(t.c2)"!)"#,
input: sort(min(col("c2"))),
expected: sort(col("MIN(t.c2)")),
expected: sort(col("min(t.c2)")),
},
TestCase {
desc: r#"c1 + min(c2) --> "c1 + MIN(c2)" -- (column *named* "min(t.c2)"!)"#,
desc: r#"c1 + min(c2) --> "c1 + min(c2)" -- (column *named* "min(t.c2)"!)"#,
input: sort(col("c1") + min(col("c2"))),
// should be "c1" not t.c1
expected: sort(col("c1") + col("MIN(t.c2)")),
expected: sort(col("c1") + col("min(t.c2)")),
},
TestCase {
desc: r#"avg(c3) --> "avg(t.c3)" as average (column *named* "avg(t.c3)", aliased)"#,
Expand Down
12 changes: 4 additions & 8 deletions datafusion/expr/src/test/function_stub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ pub fn min(expr: Expr) -> Expr {
/// Testing stub implementation of Min aggregate
pub struct Min {
signature: Signature,
aliases: Vec<String>,
}

impl std::fmt::Debug for Min {
Expand All @@ -326,7 +325,6 @@ impl Default for Min {
impl Min {
pub fn new() -> Self {
Self {
aliases: vec!["min".to_string()],
signature: Signature::variadic_any(Volatility::Immutable),
}
}
Expand All @@ -338,7 +336,7 @@ impl AggregateUDFImpl for Min {
}

fn name(&self) -> &str {
"MIN"
"min"
}

fn signature(&self) -> &Signature {
Expand All @@ -358,7 +356,7 @@ impl AggregateUDFImpl for Min {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn create_groups_accumulator(
Expand Down Expand Up @@ -392,7 +390,6 @@ pub fn max(expr: Expr) -> Expr {
/// Testing stub implementation of MAX aggregate
pub struct Max {
signature: Signature,
aliases: Vec<String>,
}

impl std::fmt::Debug for Max {
Expand All @@ -413,7 +410,6 @@ impl Default for Max {
impl Max {
pub fn new() -> Self {
Self {
aliases: vec!["max".to_string()],
signature: Signature::variadic_any(Volatility::Immutable),
}
}
Expand All @@ -425,7 +421,7 @@ impl AggregateUDFImpl for Max {
}

fn name(&self) -> &str {
"MAX"
"max"
}

fn signature(&self) -> &Signature {
Expand All @@ -445,7 +441,7 @@ impl AggregateUDFImpl for Max {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn create_groups_accumulator(
Expand Down
12 changes: 4 additions & 8 deletions datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,12 @@ fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
// MAX aggregate UDF
#[derive(Debug)]
pub struct Max {
aliases: Vec<String>,
signature: Signature,
}

impl Max {
pub fn new() -> Self {
Self {
aliases: vec!["max".to_owned()],
signature: Signature::user_defined(Volatility::Immutable),
}
}
Expand Down Expand Up @@ -146,7 +144,7 @@ impl AggregateUDFImpl for Max {
}

fn name(&self) -> &str {
"MAX"
"max"
}

fn signature(&self) -> &Signature {
Expand All @@ -162,7 +160,7 @@ impl AggregateUDFImpl for Max {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
Expand Down Expand Up @@ -891,14 +889,12 @@ impl Accumulator for SlidingMaxAccumulator {
#[derive(Debug)]
pub struct Min {
signature: Signature,
aliases: Vec<String>,
}

impl Min {
pub fn new() -> Self {
Self {
signature: Signature::user_defined(Volatility::Immutable),
aliases: vec!["min".to_owned()],
}
}
}
Expand All @@ -915,7 +911,7 @@ impl AggregateUDFImpl for Min {
}

fn name(&self) -> &str {
"MIN"
"min"
}

fn signature(&self) -> &Signature {
Expand All @@ -931,7 +927,7 @@ impl AggregateUDFImpl for Min {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ mod tests {
.build()?;

let expected = "Projection: count(Int64(1)) AS count(*) [count(*):Int64]\
\n Aggregate: groupBy=[[]], aggr=[[MAX(count(Int64(1))) AS MAX(count(*))]] [MAX(count(*)):Int64;N]\
\n Aggregate: groupBy=[[]], aggr=[[max(count(Int64(1))) AS max(count(*))]] [max(count(*)):Int64;N]\
\n TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
assert_plan_eq(plan, expected)
}
Expand Down
32 changes: 16 additions & 16 deletions datafusion/optimizer/src/optimize_projections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ mod tests {
.aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[b]";

assert_optimized_plan_equal(plan, expected)
Expand All @@ -1375,7 +1375,7 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[test.c]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[b, c]";

assert_optimized_plan_equal(plan, expected)
Expand All @@ -1390,7 +1390,7 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[a.c]], aggr=[[MAX(a.b)]]\
let expected = "Aggregate: groupBy=[[a.c]], aggr=[[max(a.b)]]\
\n SubqueryAlias: a\
\n TableScan: test projection=[b, c]";

Expand All @@ -1406,7 +1406,7 @@ mod tests {
.aggregate(Vec::<Expr>::new(), vec![max(col("b"))])?
.build()?;

let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(test.b)]]\
let expected = "Aggregate: groupBy=[[]], aggr=[[max(test.b)]]\
\n Projection: test.b\
\n Filter: test.c > Int32(1)\
\n TableScan: test projection=[b, c]";
Expand All @@ -1422,7 +1422,7 @@ mod tests {
// "tag.one", not a column named "one" in a table named "tag"):
//
// Projection: tag.one
// Aggregate: groupBy=[], aggr=[MAX("tag.one") AS "tag.one"]
// Aggregate: groupBy=[], aggr=[max("tag.one") AS "tag.one"]
// TableScan
let plan = table_scan(Some("m4"), &schema, None)?
.aggregate(
Expand All @@ -1433,7 +1433,7 @@ mod tests {
.build()?;

let expected = "\
Aggregate: groupBy=[[]], aggr=[[MAX(m4.tag.one) AS tag.one]]\
Aggregate: groupBy=[[]], aggr=[[max(m4.tag.one) AS tag.one]]\
\n TableScan: m4 projection=[tag.one]";

assert_optimized_plan_equal(plan, expected)
Expand Down Expand Up @@ -1768,11 +1768,11 @@ mod tests {
.aggregate(vec![col("c")], vec![max(col("a"))])?
.build()?;

assert_fields_eq(&plan, vec!["c", "MAX(test.a)"]);
assert_fields_eq(&plan, vec!["c", "max(test.a)"]);

let plan = optimize(plan).expect("failed to optimize plan");
let expected = "\
Aggregate: groupBy=[[test.c]], aggr=[[MAX(test.a)]]\
Aggregate: groupBy=[[test.c]], aggr=[[max(test.a)]]\
\n Filter: test.c > Int32(1)\
\n Projection: test.c, test.a\
\n TableScan: test projection=[a, c]";
Expand Down Expand Up @@ -1862,14 +1862,14 @@ mod tests {
let plan = LogicalPlanBuilder::from(table_scan)
.aggregate(vec![col("a"), col("c")], vec![max(col("b")), min(col("b"))])?
.filter(col("c").gt(lit(1)))?
.project(vec![col("c"), col("a"), col("MAX(test.b)")])?
.project(vec![col("c"), col("a"), col("max(test.b)")])?
.build()?;

assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]);
assert_fields_eq(&plan, vec!["c", "a", "max(test.b)"]);

let expected = "Projection: test.c, test.a, MAX(test.b)\
let expected = "Projection: test.c, test.a, max(test.b)\
\n Filter: test.c > Int32(1)\
\n Aggregate: groupBy=[[test.a, test.c]], aggr=[[MAX(test.b)]]\
\n Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b)]]\
\n TableScan: test projection=[a, b, c]";

assert_optimized_plan_equal(plan, expected)
Expand Down Expand Up @@ -1937,10 +1937,10 @@ mod tests {
.project(vec![col1, col2])?
.build()?;

let expected = "Projection: MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[MAX(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n Projection: test.b, MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[MAX(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
let expected = "Projection: max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[max(test.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n Projection: test.b, max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
\n WindowAggr: windowExpr=[[max(test.a) PARTITION BY [test.b] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
\n TableScan: test projection=[a, b]";

assert_optimized_plan_equal(plan, expected)
Expand Down
Loading

0 comments on commit 45d85b1

Please sign in to comment.