From ae0785ad4633ff2fd351c838edcfe82ae3999cfa Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Sat, 22 May 2021 23:20:18 +0530 Subject: [PATCH 1/7] optimize to_timestamp --- datafusion/src/optimizer/constant_folding.rs | 85 ++++++++++++-------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index af89aa13908c..904f3db936d6 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -29,6 +29,10 @@ use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::utils; use crate::physical_plan::functions::BuiltinScalarFunction; use crate::scalar::ScalarValue; +use crate::physical_plan::datetime_expressions::to_timestamp; +use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; +use std::convert::TryInto; +use crate::scalar::ScalarValue::Utf8; /// Optimizer that simplifies comparison expressions involving boolean literals. /// @@ -142,23 +146,23 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { _ => Expr::Literal(ScalarValue::Boolean(None)), }, (Expr::Literal(ScalarValue::Boolean(b)), _) - if self.is_boolean_type(&right) => - { - match b { - Some(true) => *right, - Some(false) => Expr::Not(right), - None => Expr::Literal(ScalarValue::Boolean(None)), + if self.is_boolean_type(&right) => + { + match b { + Some(true) => *right, + Some(false) => Expr::Not(right), + None => Expr::Literal(ScalarValue::Boolean(None)), + } } - } (_, Expr::Literal(ScalarValue::Boolean(b))) - if self.is_boolean_type(&left) => - { - match b { - Some(true) => *left, - Some(false) => Expr::Not(left), - None => Expr::Literal(ScalarValue::Boolean(None)), + if self.is_boolean_type(&left) => + { + match b { + Some(true) => *left, + Some(false) => Expr::Not(left), + None => Expr::Literal(ScalarValue::Boolean(None)), + } } - } _ => Expr::BinaryExpr { left, op: Operator::Eq, @@ -176,23 +180,23 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { _ => Expr::Literal(ScalarValue::Boolean(None)), }, (Expr::Literal(ScalarValue::Boolean(b)), _) - if self.is_boolean_type(&right) => - { - match b { - Some(true) => Expr::Not(right), - Some(false) => *right, - None => Expr::Literal(ScalarValue::Boolean(None)), + if self.is_boolean_type(&right) => + { + match b { + Some(true) => Expr::Not(right), + Some(false) => *right, + None => Expr::Literal(ScalarValue::Boolean(None)), + } } - } (_, Expr::Literal(ScalarValue::Boolean(b))) - if self.is_boolean_type(&left) => - { - match b { - Some(true) => Expr::Not(left), - Some(false) => *left, - None => Expr::Literal(ScalarValue::Boolean(None)), + if self.is_boolean_type(&left) => + { + match b { + Some(true) => Expr::Not(left), + Some(false) => *left, + None => Expr::Literal(ScalarValue::Boolean(None)), + } } - } _ => Expr::BinaryExpr { left, op: Operator::NotEq, @@ -217,6 +221,23 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { .query_execution_start_time .timestamp_nanos(), ))), + Expr::ScalarFunction { + fun: BuiltinScalarFunction::ToTimestamp, + args, + } => { + if args.len() > 0 { + match &args[0] { + Expr::Literal(ScalarValue::Utf8(Some(val))) => { + Expr::Literal(ScalarValue::TimestampNanosecond( + string_to_timestamp_nanos(val).ok() + )) + } + _ => Expr::ScalarFunction { fun: BuiltinScalarFunction::ToTimestamp, args } + } + } else { + Expr::ScalarFunction { fun: BuiltinScalarFunction::ToTimestamp, args } + } + } expr => { // no rewrite possible expr @@ -252,7 +273,7 @@ mod tests { DFField::new(None, "c1", DataType::Utf8, true), DFField::new(None, "c2", DataType::Boolean, true), ]) - .unwrap(), + .unwrap(), ) } @@ -319,7 +340,7 @@ mod tests { assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean); // true = ture -> true - assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true),); + assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true), ); // true = false -> false assert_eq!( @@ -328,7 +349,7 @@ mod tests { ); // c2 = true -> c2 - assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"),); + assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"), ); // c2 = false => !c2 assert_eq!( @@ -468,7 +489,7 @@ mod tests { )], else_expr: Some(Box::new(col("c2").eq(lit(true)))), })) - .rewrite(&mut rewriter)?, + .rewrite(&mut rewriter)?, Expr::Case { expr: None, when_then_expr: vec![( From ff2a20b7bbf55aa71415b4597c0820376c03e1e9 Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Sat, 22 May 2021 23:28:18 +0530 Subject: [PATCH 2/7] cargo fmt --- datafusion/src/optimizer/constant_folding.rs | 80 +++++++++++--------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 904f3db936d6..81b2ea63a854 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -27,12 +27,12 @@ use crate::execution::context::ExecutionProps; use crate::logical_plan::{DFSchemaRef, Expr, ExprRewriter, LogicalPlan, Operator}; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::utils; +use crate::physical_plan::datetime_expressions::to_timestamp; use crate::physical_plan::functions::BuiltinScalarFunction; use crate::scalar::ScalarValue; -use crate::physical_plan::datetime_expressions::to_timestamp; +use crate::scalar::ScalarValue::Utf8; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use std::convert::TryInto; -use crate::scalar::ScalarValue::Utf8; /// Optimizer that simplifies comparison expressions involving boolean literals. /// @@ -146,23 +146,23 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { _ => Expr::Literal(ScalarValue::Boolean(None)), }, (Expr::Literal(ScalarValue::Boolean(b)), _) - if self.is_boolean_type(&right) => - { - match b { - Some(true) => *right, - Some(false) => Expr::Not(right), - None => Expr::Literal(ScalarValue::Boolean(None)), - } + if self.is_boolean_type(&right) => + { + match b { + Some(true) => *right, + Some(false) => Expr::Not(right), + None => Expr::Literal(ScalarValue::Boolean(None)), } + } (_, Expr::Literal(ScalarValue::Boolean(b))) - if self.is_boolean_type(&left) => - { - match b { - Some(true) => *left, - Some(false) => Expr::Not(left), - None => Expr::Literal(ScalarValue::Boolean(None)), - } + if self.is_boolean_type(&left) => + { + match b { + Some(true) => *left, + Some(false) => Expr::Not(left), + None => Expr::Literal(ScalarValue::Boolean(None)), } + } _ => Expr::BinaryExpr { left, op: Operator::Eq, @@ -180,23 +180,23 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { _ => Expr::Literal(ScalarValue::Boolean(None)), }, (Expr::Literal(ScalarValue::Boolean(b)), _) - if self.is_boolean_type(&right) => - { - match b { - Some(true) => Expr::Not(right), - Some(false) => *right, - None => Expr::Literal(ScalarValue::Boolean(None)), - } + if self.is_boolean_type(&right) => + { + match b { + Some(true) => Expr::Not(right), + Some(false) => *right, + None => Expr::Literal(ScalarValue::Boolean(None)), } + } (_, Expr::Literal(ScalarValue::Boolean(b))) - if self.is_boolean_type(&left) => - { - match b { - Some(true) => Expr::Not(left), - Some(false) => *left, - None => Expr::Literal(ScalarValue::Boolean(None)), - } + if self.is_boolean_type(&left) => + { + match b { + Some(true) => Expr::Not(left), + Some(false) => *left, + None => Expr::Literal(ScalarValue::Boolean(None)), } + } _ => Expr::BinaryExpr { left, op: Operator::NotEq, @@ -229,13 +229,19 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { match &args[0] { Expr::Literal(ScalarValue::Utf8(Some(val))) => { Expr::Literal(ScalarValue::TimestampNanosecond( - string_to_timestamp_nanos(val).ok() + string_to_timestamp_nanos(val).ok(), )) } - _ => Expr::ScalarFunction { fun: BuiltinScalarFunction::ToTimestamp, args } + _ => Expr::ScalarFunction { + fun: BuiltinScalarFunction::ToTimestamp, + args, + }, } } else { - Expr::ScalarFunction { fun: BuiltinScalarFunction::ToTimestamp, args } + Expr::ScalarFunction { + fun: BuiltinScalarFunction::ToTimestamp, + args, + } } } expr => { @@ -273,7 +279,7 @@ mod tests { DFField::new(None, "c1", DataType::Utf8, true), DFField::new(None, "c2", DataType::Boolean, true), ]) - .unwrap(), + .unwrap(), ) } @@ -340,7 +346,7 @@ mod tests { assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean); // true = ture -> true - assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true), ); + assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true),); // true = false -> false assert_eq!( @@ -349,7 +355,7 @@ mod tests { ); // c2 = true -> c2 - assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"), ); + assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"),); // c2 = false => !c2 assert_eq!( @@ -489,7 +495,7 @@ mod tests { )], else_expr: Some(Box::new(col("c2").eq(lit(true)))), })) - .rewrite(&mut rewriter)?, + .rewrite(&mut rewriter)?, Expr::Case { expr: None, when_then_expr: vec![( From c32d2f59214d85f90538b5eac5cf2bf6158b36b2 Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Sat, 22 May 2021 23:45:58 +0530 Subject: [PATCH 3/7] fix clippy --- datafusion/src/optimizer/constant_folding.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 81b2ea63a854..a63feafbf037 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -27,12 +27,9 @@ use crate::execution::context::ExecutionProps; use crate::logical_plan::{DFSchemaRef, Expr, ExprRewriter, LogicalPlan, Operator}; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::utils; -use crate::physical_plan::datetime_expressions::to_timestamp; use crate::physical_plan::functions::BuiltinScalarFunction; use crate::scalar::ScalarValue; -use crate::scalar::ScalarValue::Utf8; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; -use std::convert::TryInto; /// Optimizer that simplifies comparison expressions involving boolean literals. /// @@ -225,7 +222,7 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { fun: BuiltinScalarFunction::ToTimestamp, args, } => { - if args.len() > 0 { + if !args.is_empty() { match &args[0] { Expr::Literal(ScalarValue::Utf8(Some(val))) => { Expr::Literal(ScalarValue::TimestampNanosecond( From 43d01ef76652a5ff054c6311ca672c96e53cad8e Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Mon, 24 May 2021 19:14:47 +0530 Subject: [PATCH 4/7] Added testcase & removed optimization for invalid timestamps --- datafusion/src/optimizer/constant_folding.rs | 35 ++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index a63feafbf037..74ad4977dce8 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -225,9 +225,15 @@ impl<'a> ExprRewriter for ConstantRewriter<'a> { if !args.is_empty() { match &args[0] { Expr::Literal(ScalarValue::Utf8(Some(val))) => { - Expr::Literal(ScalarValue::TimestampNanosecond( - string_to_timestamp_nanos(val).ok(), - )) + match string_to_timestamp_nanos(val) { + Ok(timestamp) => Expr::Literal( + ScalarValue::TimestampNanosecond(Some(timestamp)), + ), + _ => Expr::ScalarFunction { + fun: BuiltinScalarFunction::ToTimestamp, + args, + }, + } } _ => Expr::ScalarFunction { fun: BuiltinScalarFunction::ToTimestamp, @@ -656,6 +662,29 @@ mod tests { return format!("{:?}", optimized_plan); } + #[test] + fn to_timestamp_expr() { + let table_scan = test_table_scan().unwrap(); + let proj = vec![Expr::ScalarFunction { + args: vec![Expr::Literal(ScalarValue::Utf8(Some( + "2020-09-08T12:00:00+00:00".to_string(), + )))], + fun: BuiltinScalarFunction::ToTimestamp, + }]; + let plan = LogicalPlanBuilder::from(&table_scan) + .project(proj) + .unwrap() + .build() + .unwrap(); + + let expected = format!( + "Projection: TimestampNanosecond(1599566400000000000)\ + \n TableScan: test projection=None", + ); + let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); + assert_eq!(expected, actual); + } + #[test] fn single_now_expr() { let table_scan = test_table_scan().unwrap(); From 66636717ec137a4c001a28bbd8ccb701afb745a0 Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Mon, 24 May 2021 23:01:55 +0530 Subject: [PATCH 5/7] Added negative testcase --- datafusion/src/optimizer/constant_folding.rs | 44 ++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 74ad4977dce8..868245a708ee 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -685,6 +685,50 @@ mod tests { assert_eq!(expected, actual); } + #[test] + fn to_timestamp_expr_wrong_arg() { + let table_scan = test_table_scan().unwrap(); + let proj = vec![Expr::ScalarFunction { + args: vec![Expr::Literal(ScalarValue::Utf8(Some( + "I'M NOT A TIMESTAMP".to_string(), + )))], + fun: BuiltinScalarFunction::ToTimestamp, + }]; + let plan = LogicalPlanBuilder::from(&table_scan) + .project(proj) + .unwrap() + .build() + .unwrap(); + + let expected = format!( + "Projection: totimestamp(Utf8(\"I\'M NOT A TIMESTAMP\"))\ + \n TableScan: test projection=None", + ); + let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); + assert_eq!(expected, actual); + } + + #[test] + fn to_timestamp_expr_no_arg() { + let table_scan = test_table_scan().unwrap(); + let proj = vec![Expr::ScalarFunction { + args: vec![], + fun: BuiltinScalarFunction::ToTimestamp, + }]; + let plan = LogicalPlanBuilder::from(&table_scan) + .project(proj) + .unwrap() + .build() + .unwrap(); + + let expected = format!( + "Projection: totimestamp()\ + \n TableScan: test projection=None", + ); + let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); + assert_eq!(expected, actual); + } + #[test] fn single_now_expr() { let table_scan = test_table_scan().unwrap(); From 21acdb5273cead126a0d0ee06cdefcad838f7260 Mon Sep 17 00:00:00 2001 From: Sathis Kumar Date: Tue, 25 May 2021 13:01:14 +0530 Subject: [PATCH 6/7] Fix clippy --- datafusion/src/optimizer/constant_folding.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 868245a708ee..516d60351d2f 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -700,10 +700,8 @@ mod tests { .build() .unwrap(); - let expected = format!( - "Projection: totimestamp(Utf8(\"I\'M NOT A TIMESTAMP\"))\ - \n TableScan: test projection=None", - ); + let expected = "Projection: totimestamp(Utf8(\"I\'M NOT A TIMESTAMP\"))\ + \n TableScan: test projection=None"; let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); assert_eq!(expected, actual); } @@ -721,10 +719,8 @@ mod tests { .build() .unwrap(); - let expected = format!( - "Projection: totimestamp()\ - \n TableScan: test projection=None", - ); + let expected = "Projection: totimestamp()\ + \n TableScan: test projection=None"; let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); assert_eq!(expected, actual); } From 2b9b9b710aceb8d9b7cf37e8233c596d9a41db40 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 May 2021 11:49:37 -0400 Subject: [PATCH 7/7] fix clippy --- datafusion/src/optimizer/constant_folding.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 516d60351d2f..97cc23264bda 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -677,10 +677,9 @@ mod tests { .build() .unwrap(); - let expected = format!( - "Projection: TimestampNanosecond(1599566400000000000)\ - \n TableScan: test projection=None", - ); + let expected = "Projection: TimestampNanosecond(1599566400000000000)\ + \n TableScan: test projection=None" + .to_string(); let actual = get_optimized_plan_formatted(&plan, &chrono::Utc::now()); assert_eq!(expected, actual); }