From 7a2294ba1fee3ea8d9759350d317f2590eb205ce Mon Sep 17 00:00:00 2001 From: Matthew Gapp <61894094+matthewgapp@users.noreply.github.com> Date: Sun, 24 Sep 2023 15:32:57 -0700 Subject: [PATCH 1/3] add nullablity information to window function that is used to determine experession and thus column nullablity when constructing window plan --- datafusion/expr/src/expr.rs | 8 +++++ datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/window_function.rs | 49 +++++++++++++++++++------- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 94ef69eb7933..4f4335e269ac 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -508,6 +508,14 @@ impl WindowFunction { window_frame, } } + + pub fn nullable(&self) -> bool { + use window_function::WindowFunction as F; + match &self.fun { + F::BuiltInWindowFunction(f) => f.nullable(), + F::AggregateFunction(_) | F::WindowUDF(_) | F::AggregateUDF(_) => true, + } + } } // Exists expression. diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 9651b377c5bd..f33b783136ff 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -229,11 +229,11 @@ impl ExprSchemable for Expr { } } Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema), + Expr::WindowFunction(window_function) => Ok(window_function.nullable()), Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::ScalarFunction(..) | Expr::ScalarUDF(..) - | Expr::WindowFunction { .. } | Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } | Expr::Placeholder(_) => Ok(true), diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index 1f36ebdd6b54..4446ee0af61a 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -114,19 +114,35 @@ pub enum BuiltInWindowFunction { impl BuiltInWindowFunction { fn name(&self) -> &str { - use BuiltInWindowFunction::*; + use BuiltInWindowFunction as F; match self { - RowNumber => "ROW_NUMBER", - Rank => "RANK", - DenseRank => "DENSE_RANK", - PercentRank => "PERCENT_RANK", - CumeDist => "CUME_DIST", - Ntile => "NTILE", - Lag => "LAG", - Lead => "LEAD", - FirstValue => "FIRST_VALUE", - LastValue => "LAST_VALUE", - NthValue => "NTH_VALUE", + F::RowNumber => "ROW_NUMBER", + F::Rank => "RANK", + F::DenseRank => "DENSE_RANK", + F::PercentRank => "PERCENT_RANK", + F::CumeDist => "CUME_DIST", + F::Ntile => "NTILE", + F::Lag => "LAG", + F::Lead => "LEAD", + F::FirstValue => "FIRST_VALUE", + F::LastValue => "LAST_VALUE", + F::NthValue => "NTH_VALUE", + } + } + + // these values need to stay in sync with the `field` value defined on the physical expressions + pub fn nullable(&self) -> bool { + use BuiltInWindowFunction as F; + match self { + F::RowNumber | F::Ntile | F::Rank | F::CumeDist => false, + // the rest are assumed to be nullable + F::DenseRank + | F::PercentRank + | F::Lag + | F::Lead + | F::FirstValue + | F::LastValue + | F::NthValue => true, } } } @@ -179,6 +195,15 @@ impl WindowFunction { } } } + + // pub fn field(&self) { + // match self { + // WindowFunction::AggregateFunction(fun) => fun.field(), + // WindowFunction::BuiltInWindowFunction(fun) => fun.field(), + // WindowFunction::AggregateUDF(fun) => fun.field(), + // WindowFunction::WindowUDF(fun) => fun.field(), + // } + // } } /// Returns the datatype of the built-in window function From 363d5f851b28d21172340d432f24455306c22e83 Mon Sep 17 00:00:00 2001 From: Matthew Gapp <61894094+matthewgapp@users.noreply.github.com> Date: Sun, 24 Sep 2023 15:37:37 -0700 Subject: [PATCH 2/3] remove commented code --- datafusion/expr/src/window_function.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index 4446ee0af61a..a411376947a6 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -195,15 +195,6 @@ impl WindowFunction { } } } - - // pub fn field(&self) { - // match self { - // WindowFunction::AggregateFunction(fun) => fun.field(), - // WindowFunction::BuiltInWindowFunction(fun) => fun.field(), - // WindowFunction::AggregateUDF(fun) => fun.field(), - // WindowFunction::WindowUDF(fun) => fun.field(), - // } - // } } /// Returns the datatype of the built-in window function From 3325ab09c74391583ef3b435c9eef99c9068c11f Mon Sep 17 00:00:00 2001 From: Matthew Gapp <61894094+matthewgapp@users.noreply.github.com> Date: Sun, 24 Sep 2023 19:08:51 -0700 Subject: [PATCH 3/3] sqllogictest --- datafusion/sqllogictest/test_files/window.slt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 3d9f7511be26..7571b88b5ad1 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -3315,3 +3315,23 @@ SELECT window1 AS (ORDER BY C3) ORDER BY C3 LIMIT 5 + +# Create table with window functions that have nullable: false columns should result in correct schema during cross join + +statement ok +CREATE TABLE row_num_table as SELECT *, ROW_NUMBER() OVER (ORDER BY c2) AS row_num FROM aggregate_test_100 LIMIT 10 + +statement ok +CREATE TABLE rank_table as SELECT *, RANK() OVER (ORDER BY c2) AS rank_num FROM aggregate_test_100 LIMIT 10 + +statement ok +CREATE TABLE cum_dist_table as SELECT *, CUME_DIST() OVER (ORDER BY c2) AS cum_dist_num FROM aggregate_test_100 LIMIT 10 + +statement ok +SELECT a.*, b.* FROM row_num_table a, row_num_table b + +statement ok +SELECT a.*, b.* FROM rank_table a, rank_table b + +statement ok +SELECT a.*, b.* FROM cum_dist_table a, cum_dist_table b \ No newline at end of file