From cbbc3b1bea5f7c4340ed400397b2b17383c355e1 Mon Sep 17 00:00:00 2001 From: victoria de sainte agathe Date: Fri, 17 Nov 2023 10:47:22 +0100 Subject: [PATCH 1/2] implemented substring function --- CHANGELOG.md | 2 + src/data_type/function.rs | 131 ++++++++++++++++++++++++++++++++++++- src/expr/function.rs | 11 +++- src/expr/implementation.rs | 5 +- src/expr/mod.rs | 48 +++++++++++++- src/expr/sql.rs | 49 +++++++++++++- src/sql/expr.rs | 30 ++++++++- 7 files changed, 265 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d8f12ce..280700e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## Added +- `SUBSTR` function [MR184](https://github.com/Qrlew/qrlew/pull/184) ## [0.4.13] - 2023-11-14 ## Added diff --git a/src/data_type/function.rs b/src/data_type/function.rs index adfa51a7..7e2d3e89 100644 --- a/src/data_type/function.rs +++ b/src/data_type/function.rs @@ -282,6 +282,46 @@ impl Pointwise { }), ) } + + /// Build trivariate pointwise function + pub fn trivariate( + domain: (A, B, C), + co_domain: D, + value: impl Fn( + ::Wrapped, + ::Wrapped, + ::Wrapped + ) -> ::Wrapped + + Sync + + Send + + 'static, + ) -> Self + where + ::Wrapped: TryFrom, + ::Wrapped: TryFrom, + ::Wrapped: TryFrom, + <::Wrapped as TryFrom>::Error: fmt::Debug, + Error: From<<::Wrapped as TryFrom>::Error>, + <::Wrapped as TryFrom>::Error: fmt::Debug, + Error: From<<::Wrapped as TryFrom>::Error>, + <::Wrapped as TryFrom>::Error: fmt::Debug, + Error: From<<::Wrapped as TryFrom>::Error>, + ::Wrapped: Into, + { + let domain = data_type::Struct::from_data_types(&[domain.0.into(), domain.1.into(), domain.2.into()]); + Self::new( + domain.into(), + co_domain.into(), + Arc::new(move |ab| { + let abc = value::Struct::try_from(ab).unwrap(); + let a = ::Wrapped::try_from(abc[0].as_ref().clone()); + let b = ::Wrapped::try_from(abc[1].as_ref().clone()); + let c = ::Wrapped::try_from(abc[2].as_ref().clone()); + Ok(a.map(|a| b.map(|b| c.map( |c| value(a, b, c).into())))???) + }), + ) + } + /// Build variadic pointwise function pub fn variadic( domain: Vec, @@ -1296,10 +1336,37 @@ pub fn ltrim() -> impl Function { ) } +pub fn substr() -> impl Function { + Pointwise::bivariate( + (data_type::Text::default(), data_type::Integer::default()), + data_type::Text::default(), + |a, b| { + let start = b as usize; + a.as_str().get(start..).unwrap_or("").to_string() + } + ) +} + +pub fn substr_with_size() -> impl Function { + Pointwise::trivariate( + (data_type::Text::default(), data_type::Integer::default(), data_type::Integer::default()), + data_type::Text::default(), + |a, b, c| { + let start = b as usize; + let end = cmp::min((b + c) as usize, a.len()); + a.as_str().get(start..end).unwrap_or("").to_string() + } + ) +} + pub fn concat(n: usize) -> impl Function { - Pointwise::variadic(vec![DataType::Any; n], data_type::Text::default(), |v| { - v.into_iter().map(|v| v.to_string()).join("") - }) + Pointwise::variadic( + vec![DataType::Any; n], + data_type::Text::default(), + |v| { + v.into_iter().map(|v| v.to_string()).join("") + } + ) } pub fn md5() -> impl Function { @@ -3117,4 +3184,62 @@ mod tests { println!("val({}) = {}", arg, val); assert_eq!(val, Value::from("arus".to_string())); } + + #[test] + fn test_substr() { + println!("Test substr"); + let fun = substr(); + println!("type = {}", fun); + println!("domain = {}", fun.domain()); + println!("co_domain = {}", fun.co_domain()); + + let set = DataType::from(Struct::from_data_types(&[ + DataType::text(), + DataType::integer(), + ])); + let im = fun.super_image(&set).unwrap(); + println!("im({}) = {}", set, im); + assert!(im == DataType::text()); + + let set = DataType::from(Struct::from_data_types(&[ + DataType::text_values(["abcdefg".to_string(), "hijklmno".to_string()]), + DataType::integer_values([3, 6, 10]), + ])); + let im = fun.super_image(&set).unwrap(); + println!("im({}) = {}", set, im); + assert_eq!( + im, + DataType::text_values(["".to_string(), "defg".to_string(), "g".to_string(), "klmno".to_string(), "no".to_string()]) + ); + } + + #[test] + fn test_substr_with_size() { + println!("Test substr_with_size"); + let fun = substr_with_size(); + println!("type = {}", fun); + println!("domain = {}", fun.domain()); + println!("co_domain = {}", fun.co_domain()); + + let set = DataType::from(Struct::from_data_types(&[ + DataType::text(), + DataType::integer(), + DataType::integer() + ])); + let im = fun.super_image(&set).unwrap(); + println!("im({}) = {}", set, im); + assert!(im == DataType::text()); + + let set = DataType::from(Struct::from_data_types(&[ + DataType::text_values(["abcdefg".to_string(), "hijklmno".to_string()]), + DataType::integer_values([3, 6, 10]), + DataType::integer_value(2) + ])); + let im = fun.super_image(&set).unwrap(); + println!("im({}) = {}", set, im); + assert_eq!( + im, + DataType::text_values(["".to_string(), "de".to_string(), "g".to_string(), "kl".to_string(), "no".to_string()]) + ); + } } diff --git a/src/expr/function.rs b/src/expr/function.rs index 4abb5944..ae7ef8c0 100644 --- a/src/expr/function.rs +++ b/src/expr/function.rs @@ -58,6 +58,8 @@ pub enum Function { Greatest, Rtrim, Ltrim, + Substr, + SubstrWithSize, } #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] @@ -126,8 +128,10 @@ impl Function { | Function::Coalesce | Function::Rtrim | Function::Ltrim + | Function::Substr // Ternary Function | Function::Case + | Function::SubstrWithSize // Nary Function | Function::Concat(_) => Style::Function, } @@ -183,11 +187,12 @@ impl Function { | Function::Greatest | Function::Coalesce | Function::Rtrim - | Function::Ltrim => { + | Function::Ltrim + | Function::Substr => { Arity::Nary(2) } // Ternary Function - Function::Case => Arity::Nary(3), + Function::Case | Function::SubstrWithSize => Arity::Nary(3), // Nary Function Function::Concat(_) => Arity::Varying, } @@ -266,8 +271,10 @@ impl fmt::Display for Function { Function::Coalesce => "coalesce", Function::Rtrim => "rtrim", Function::Ltrim => "ltrim", + Function::Substr => "substr", // Ternary Functions Function::Case => "case", + Function::SubstrWithSize => "substr", // Nary Functions Function::Concat(_) => "concat", }) diff --git a/src/expr/implementation.rs b/src/expr/implementation.rs index 0426b7b2..15076fc3 100644 --- a/src/expr/implementation.rs +++ b/src/expr/implementation.rs @@ -73,9 +73,10 @@ function_implementations!( Least, Greatest, Rtrim, - Ltrim + Ltrim, + Substr ], - [Case, Position], + [Case, Position, SubstrWithSize], x, { match x { diff --git a/src/expr/mod.rs b/src/expr/mod.rs index 50bf628b..f32bfdc0 100644 --- a/src/expr/mod.rs +++ b/src/expr/mod.rs @@ -335,7 +335,8 @@ impl_binary_function_constructors!( Greatest, Coalesce, Rtrim, - Ltrim + Ltrim, + Substr ); /// Implement ternary function constructors @@ -363,7 +364,7 @@ macro_rules! impl_ternary_function_constructors { }; } -impl_ternary_function_constructors!(Case); +impl_ternary_function_constructors!(Case, SubstrWithSize); /// Implement nary function constructors macro_rules! impl_nary_function_constructors { @@ -2806,4 +2807,47 @@ mod tests { DataType::text_values(["".to_string(), "ab".to_string(), "abb".to_string(), "bb".to_string(), "bbb".to_string()]) ); } + + #[test] + fn test_substr() { + let expression = Expr::substr( + Expr::col("col1".to_string()), + Expr::val("2".to_string()), + ); + println!("\nexpression = {}", expression); + println!("expression data type = {}", expression.data_type()); + let set = DataType::structured([ + ("col1", DataType::optional(DataType::text_values(["abcdefg".to_string(), "hijkl".to_string()]))), + ]); + println!( + "expression super image = {}", + expression.super_image(&set).unwrap() + ); + assert_eq!( + expression.super_image(&set).unwrap(), + DataType::optional(DataType::text()) + ); + } + + #[test] + fn test_substr_with_size() { + let expression = Expr::substr_with_size( + Expr::col("col1".to_string()), + Expr::val("2".to_string()), + Expr::val("4".to_string()), + ); + println!("\nexpression = {}", expression); + println!("expression data type = {}", expression.data_type()); + let set = DataType::structured([ + ("col1", DataType::text_values(["abcdefg".to_string(), "hijkl".to_string()])), + ]); + println!( + "expression super image = {}", + expression.super_image(&set).unwrap() + ); + assert_eq!( + expression.super_image(&set).unwrap(), + DataType::optional(DataType::text()) + ); + } } diff --git a/src/expr/sql.rs b/src/expr/sql.rs index 6a7ad9da..39294cf2 100644 --- a/src/expr/sql.rs +++ b/src/expr/sql.rs @@ -182,7 +182,9 @@ impl<'a> expr::Visitor<'a, ast::Expr> for FromExprVisitor { | expr::function::Function::Greatest | expr::function::Function::Coalesce | expr::function::Function::Rtrim - | expr::function::Function::Ltrim => ast::Expr::Function(ast::Function { + | expr::function::Function::Ltrim + | expr::function::Function::Substr + | expr::function::Function::SubstrWithSize => ast::Expr::Function(ast::Function { name: ast::ObjectName(vec![ast::Ident::new(function.to_string())]), args: arguments .into_iter() @@ -483,4 +485,49 @@ mod tests { println!("ast::expr = {:?}", ast_expr); assert_eq!(ast_expr.to_string(), str_expr.to_string(),); } + + #[test] + fn test_substr() { + let str_expr = "substr(a, 5, 2)"; + let ast_expr: ast::Expr = parse_expr(str_expr).unwrap(); + let expr = Expr::try_from(&ast_expr).unwrap(); + println!("expr = {}", expr); + let gen_expr = ast::Expr::from(&expr); + println!("ast::expr = {gen_expr}"); + assert_eq!(ast_expr, gen_expr); + + let str_expr = "\nsubstr(a, 5)"; + let ast_expr: ast::Expr = parse_expr(str_expr).unwrap(); + let expr = Expr::try_from(&ast_expr).unwrap(); + println!("expr = {}", expr); + let gen_expr = ast::Expr::from(&expr); + println!("ast::expr = {gen_expr}"); + assert_eq!(ast_expr, gen_expr); + + let str_expr = "\nsubstring(a from 5 for 2)"; + let ast_expr: ast::Expr = parse_expr(str_expr).unwrap(); + let expr = Expr::try_from(&ast_expr).unwrap(); + println!("expr = {}", expr); + let gen_expr = ast::Expr::from(&expr); + println!("ast::expr = {gen_expr}"); + assert_eq!(gen_expr, parse_expr("substr(a, 5, 2)").unwrap()); + + let str_expr = "\nsubstring(a from 5)"; + let ast_expr: ast::Expr = parse_expr(str_expr).unwrap(); + let expr = Expr::try_from(&ast_expr).unwrap(); + println!("expr = {}", expr); + let gen_expr = ast::Expr::from(&expr); + println!("ast::expr = {gen_expr}"); + assert_eq!(gen_expr, parse_expr("substr(a, 5)").unwrap()); + + let str_expr = "\nsubstring(a for 5)"; + let ast_expr: ast::Expr = parse_expr(str_expr).unwrap(); + let expr = Expr::try_from(&ast_expr).unwrap(); + println!("expr = {}", expr); + let gen_expr = ast::Expr::from(&expr); + println!("ast::expr = {gen_expr}"); + assert_eq!(gen_expr, parse_expr("substr(a, 0, 5)").unwrap()); + } + + } diff --git a/src/sql/expr.rs b/src/sql/expr.rs index 404af646..4d10a945 100644 --- a/src/sql/expr.rs +++ b/src/sql/expr.rs @@ -267,6 +267,7 @@ pub trait Visitor<'a, T: Clone> { fn position(&self, expr: T, r#in: T) -> T; fn in_list(&self, expr: T, list: Vec) -> T; fn trim(&self, expr: T, trim_where: &Option, trim_what: Option) -> T; + fn substring(&self, expr: T, substring_from: Option, substring_for: Option) -> T; } // For the visitor to be more convenient, we create a few auxiliary objects @@ -399,7 +400,11 @@ impl<'a, T: Clone, V: Visitor<'a, T>> visitor::Visitor<'a, ast::Expr, T> for V { substring_from, substring_for, special, - } => todo!(), + } => self.substring( + dependencies.get(expr).clone(), + substring_from.as_ref().map(|x| dependencies.get(x.as_ref()).clone()), + substring_for.as_ref().map(|x| dependencies.get(x.as_ref()).clone()), + ), ast::Expr::Trim { expr, trim_where, @@ -609,6 +614,15 @@ impl<'a> Visitor<'a, String> for DisplayVisitor { ) } + + fn substring(&self, expr: String, substring_from: Option, substring_for: Option) -> String { + format!( + "SUBSTRING ({} {} {})", + expr, + substring_from.map(|s| format!("FROM {}", s)).unwrap_or("".to_string()), + substring_for.map(|s| format!("FOR {}", s)).unwrap_or("".to_string()), + ) + } } /// A simple ast::Expr -> Expr conversion Visitor @@ -792,6 +806,13 @@ impl<'a> Visitor<'a, Result> for TryIntoExprVisitor<'a> { "upper" => Expr::upper(flat_args[0].clone()), "char_length" => Expr::char_length(flat_args[0].clone()), "concat" => Expr::concat(flat_args.clone()), + "substr" => { + if flat_args.len() > 2 { + Expr::substr_with_size(flat_args[0].clone(), flat_args[1].clone(), flat_args[2].clone()) + } else { + Expr::substr(flat_args[0].clone(), flat_args[1].clone()) + } + } // Aggregates "min" => Expr::min(flat_args[0].clone()), "max" => Expr::max(flat_args[0].clone()), @@ -855,6 +876,13 @@ impl<'a> Visitor<'a, Result> for TryIntoExprVisitor<'a> { } ) } + + fn substring(&self, expr: Result, substring_from: Option>, substring_for: Option>) -> Result { + let substring_from = substring_from.unwrap_or(Ok(Expr::val(0))); + substring_for + .map(|x| Ok(Expr::substr_with_size(expr.clone()?, substring_from.clone()?, x?))) + .unwrap_or(Ok(Expr::substr(expr.clone()?, substring_from.clone()?))) + } } /// Based on the TryIntoExprVisitor implement the TryFrom trait From 7688d9b1e58fed9918ac3b1af9f1fc6196e28f45 Mon Sep 17 00:00:00 2001 From: victoria de sainte agathe Date: Fri, 17 Nov 2023 10:48:46 +0100 Subject: [PATCH 2/2] changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 280700e7..78f775e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## Added -- `SUBSTR` function [MR184](https://github.com/Qrlew/qrlew/pull/184) +- `SUBSTR` function [MR186](https://github.com/Qrlew/qrlew/pull/186) ## [0.4.13] - 2023-11-14 ## Added