diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 94ef69eb7933..4f4335e269ac 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -508,6 +508,14 @@ impl WindowFunction { window_frame, } } + + pub fn nullable(&self) -> bool { + use window_function::WindowFunction as F; + match &self.fun { + F::BuiltInWindowFunction(f) => f.nullable(), + F::AggregateFunction(_) | F::WindowUDF(_) | F::AggregateUDF(_) => true, + } + } } // Exists expression. diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 9651b377c5bd..f33b783136ff 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -229,11 +229,11 @@ impl ExprSchemable for Expr { } } Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema), + Expr::WindowFunction(window_function) => Ok(window_function.nullable()), Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::ScalarFunction(..) | Expr::ScalarUDF(..) - | Expr::WindowFunction { .. } | Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } | Expr::Placeholder(_) => Ok(true), diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index 1f36ebdd6b54..a411376947a6 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -114,19 +114,35 @@ pub enum BuiltInWindowFunction { impl BuiltInWindowFunction { fn name(&self) -> &str { - use BuiltInWindowFunction::*; + use BuiltInWindowFunction as F; match self { - RowNumber => "ROW_NUMBER", - Rank => "RANK", - DenseRank => "DENSE_RANK", - PercentRank => "PERCENT_RANK", - CumeDist => "CUME_DIST", - Ntile => "NTILE", - Lag => "LAG", - Lead => "LEAD", - FirstValue => "FIRST_VALUE", - LastValue => "LAST_VALUE", - NthValue => "NTH_VALUE", + F::RowNumber => "ROW_NUMBER", + F::Rank => "RANK", + F::DenseRank => "DENSE_RANK", + F::PercentRank => "PERCENT_RANK", + F::CumeDist => "CUME_DIST", + F::Ntile => "NTILE", + F::Lag => "LAG", + F::Lead => "LEAD", + F::FirstValue => "FIRST_VALUE", + F::LastValue => "LAST_VALUE", + F::NthValue => "NTH_VALUE", + } + } + + // these values need to stay in sync with the `field` value defined on the physical expressions + pub fn nullable(&self) -> bool { + use BuiltInWindowFunction as F; + match self { + F::RowNumber | F::Ntile | F::Rank | F::CumeDist => false, + // the rest are assumed to be nullable + F::DenseRank + | F::PercentRank + | F::Lag + | F::Lead + | F::FirstValue + | F::LastValue + | F::NthValue => true, } } } diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 3d9f7511be26..7571b88b5ad1 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -3315,3 +3315,23 @@ SELECT window1 AS (ORDER BY C3) ORDER BY C3 LIMIT 5 + +# Create table with window functions that have nullable: false columns should result in correct schema during cross join + +statement ok +CREATE TABLE row_num_table as SELECT *, ROW_NUMBER() OVER (ORDER BY c2) AS row_num FROM aggregate_test_100 LIMIT 10 + +statement ok +CREATE TABLE rank_table as SELECT *, RANK() OVER (ORDER BY c2) AS rank_num FROM aggregate_test_100 LIMIT 10 + +statement ok +CREATE TABLE cum_dist_table as SELECT *, CUME_DIST() OVER (ORDER BY c2) AS cum_dist_num FROM aggregate_test_100 LIMIT 10 + +statement ok +SELECT a.*, b.* FROM row_num_table a, row_num_table b + +statement ok +SELECT a.*, b.* FROM rank_table a, rank_table b + +statement ok +SELECT a.*, b.* FROM cum_dist_table a, cum_dist_table b \ No newline at end of file