From a5f1a572906b92a47e079b058d4c4f0ed4b6ea1a Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Mon, 26 Feb 2024 19:27:11 -0600 Subject: [PATCH 1/8] port range function and change gen_series logic --- datafusion/expr/src/built_in_function.rs | 15 --- datafusion/expr/src/expr_fn.rs | 6 - datafusion/functions-array/src/kernels.rs | 69 ++++++++++- datafusion/functions-array/src/lib.rs | 6 +- datafusion/functions-array/src/udf.rs | 116 +++++++++++++++++- .../physical-expr/src/array_expressions.rs | 1 - datafusion/physical-expr/src/functions.rs | 3 - datafusion/proto/proto/datafusion.proto | 2 +- datafusion/proto/src/generated/pbjson.rs | 3 - datafusion/proto/src/generated/prost.rs | 4 +- .../proto/src/logical_plan/from_proto.rs | 11 +- datafusion/proto/src/logical_plan/to_proto.rs | 1 - .../test_files/range_and_gen_series.slt | 48 ++++++++ 13 files changed, 237 insertions(+), 48 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/range_and_gen_series.slt diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 8df2f4e88d41..512d8dc49c12 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -182,8 +182,6 @@ pub enum BuiltinScalarFunction { MakeArray, /// Flatten Flatten, - /// Range - Range, // struct functions /// struct @@ -424,7 +422,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable, BuiltinScalarFunction::ArrayUnion => Volatility::Immutable, BuiltinScalarFunction::ArrayResize => Volatility::Immutable, - BuiltinScalarFunction::Range => Volatility::Immutable, BuiltinScalarFunction::Cardinality => Volatility::Immutable, BuiltinScalarFunction::MakeArray => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, @@ -635,9 +632,6 @@ impl BuiltinScalarFunction { (dt, _) => Ok(dt), } } - BuiltinScalarFunction::Range => { - Ok(List(Arc::new(Field::new("item", Int64, true)))) - } BuiltinScalarFunction::ArrayExcept => { match (input_expr_types[0].clone(), input_expr_types[1].clone()) { (DataType::Null, _) | (_, DataType::Null) => { @@ -966,14 +960,6 @@ impl BuiltinScalarFunction { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::Range => Signature::one_of( - vec![ - Exact(vec![Int64]), - Exact(vec![Int64, Int64]), - Exact(vec![Int64, Int64, Int64]), - ], - self.volatility(), - ), BuiltinScalarFunction::Struct => Signature::variadic_any(self.volatility()), BuiltinScalarFunction::Concat | BuiltinScalarFunction::ConcatWithSeparator => { @@ -1593,7 +1579,6 @@ impl BuiltinScalarFunction { &["array_intersect", "list_intersect"] } BuiltinScalarFunction::OverLay => &["overlay"], - BuiltinScalarFunction::Range => &["range", "generate_series"], // struct functions BuiltinScalarFunction::Struct => &["struct"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 7ffd2f76e783..dd3c23108877 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -765,12 +765,6 @@ scalar_expr!( "Returns an array of the elements in the intersection of array1 and array2." ); -nary_scalar_expr!( - Range, - gen_range, - "Returns a list of values in the range between start and stop with step." -); - // string functions scalar_expr!(Ascii, ascii, chr, "ASCII code value of the character"); scalar_expr!( diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index 1b96e01d8b9a..64c99c2417ad 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -23,11 +23,12 @@ use arrow::array::{ StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; use arrow::datatypes::DataType; -use datafusion_common::cast::{as_large_list_array, as_list_array, as_string_array}; +use datafusion_common::cast::{ + as_int64_array, as_large_list_array, as_list_array, as_string_array, +}; use datafusion_common::{exec_err, DataFusionError}; use std::any::type_name; use std::sync::Arc; - macro_rules! downcast_arg { ($ARG:expr, $ARRAY_TYPE:ident) => {{ $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { @@ -252,3 +253,67 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result` representing the resulting ListArray after the operation. +/// +/// # Arguments +/// +/// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and step(step value can not be zero.) values. +/// +/// # Examples +/// +/// gen_range(3) => [0, 1, 2] +/// gen_range(1, 4) => [1, 2, 3] +/// gen_range(1, 7, 2) => [1, 3, 5] +pub fn gen_range( + args: &[ArrayRef], + include_upper: i64, +) -> datafusion_common::Result { + let (start_array, stop_array, step_array) = match args.len() { + 1 => (None, as_int64_array(&args[0])?, None), + 2 => ( + Some(as_int64_array(&args[0])?), + as_int64_array(&args[1])?, + None, + ), + 3 => ( + Some(as_int64_array(&args[0])?), + as_int64_array(&args[1])?, + Some(as_int64_array(&args[2])?), + ), + _ => return exec_err!("gen_range expects 1 to 3 arguments"), + }; + + let mut values = vec![]; + let mut offsets = vec![0]; + for (idx, stop) in stop_array.iter().enumerate() { + let stop = stop.unwrap_or(0) + include_upper; + let start = start_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(0); + let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1); + if step == 0 { + return exec_err!("step can't be 0 for function range(start [, stop, step]"); + } + if step < 0 { + // Decreasing range + values.extend((stop + 1..start + 1).rev().step_by((-step) as usize)); + } else { + // Increasing range + values.extend((start..stop).step_by(step as usize)); + } + + offsets.push(values.len() as i32); + } + let arr = Arc::new(ListArray::try_new( + Arc::new(Field::new("item", DataType::Int64, true)), + OffsetBuffer::new(offsets.into()), + Arc::new(Int64Array::from(values)), + None, + )?); + Ok(arr) +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index 84997ed10e32..52ee35211888 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -44,7 +44,11 @@ pub mod expr_fn { /// Registers all enabled packages with a [`FunctionRegistry`] pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { - let functions: Vec> = vec![udf::array_to_string_udf()]; + let functions: Vec> = vec![ + udf::array_to_string_udf(), + udf::range_udf(), + udf::gen_series_udf(), + ]; functions.into_iter().try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; if let Some(existing_udf) = existing_udf { diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 79fb83c059a4..7bce44950b04 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -18,12 +18,14 @@ //! [`ScalarUDFImpl`] definitions for array functions. use arrow::datatypes::DataType; -use datafusion_common::plan_err; +use arrow::datatypes::Field; +use datafusion_common::{plan_err, DataFusionError}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::Expr; +use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; - +use std::sync::Arc; // Create static instances of ScalarUDFs for each function make_udf_function!(ArrayToString, array_to_string, @@ -31,7 +33,6 @@ make_udf_function!(ArrayToString, "converts each element to its text representation.", // doc array_to_string_udf // internal function name ); - #[derive(Debug)] pub(super) struct ArrayToString { signature: Signature, @@ -83,3 +84,112 @@ impl ScalarUDFImpl for ArrayToString { &self.aliases } } + +make_udf_function!( + Range, + range, + input diamilter, + "create a list of values in the range between start and stop", + range_udf +); +#[derive(Debug)] +pub(super) struct Range { + signature: Signature, + aliases: Vec, +} +impl Range { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Int64]), + Exact(vec![Int64, Int64]), + Exact(vec![Int64, Int64, Int64]), + ], + Volatility::Immutable, + ), + aliases: vec![String::from("range")], + } + } +} +impl ScalarUDFImpl for Range { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "range" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(List(Arc::new(Field::new("item", Int64, true)))) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::gen_range(&args, 0).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} +make_udf_function!( + GenSeries, + gen_series, + input diamilter, + "create a list of values in the range between start and stop, include upper bound", + gen_series_udf +); +#[derive(Debug)] +pub(super) struct GenSeries { + signature: Signature, + aliases: Vec, +} +impl GenSeries { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Int64]), + Exact(vec![Int64, Int64]), + Exact(vec![Int64, Int64, Int64]), + ], + Volatility::Immutable, + ), + aliases: vec![String::from("generate_series")], + } + } +} +impl ScalarUDFImpl for GenSeries { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "generate_series" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(List(Arc::new(Field::new("item", Int64, true)))) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::gen_range(&args, 1).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 38a4359b4f4b..d6cd03ced40a 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -39,7 +39,6 @@ use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DataFusionError, Result, ScalarValue, }; - use itertools::Itertools; macro_rules! downcast_arg { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index d2b9a68ef8b9..04c69a62909f 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -406,9 +406,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_intersect)(args) }), - BuiltinScalarFunction::Range => Arc::new(|args| { - make_scalar_function_inner(array_expressions::gen_range)(args) - }), BuiltinScalarFunction::Cardinality => Arc::new(|args| { make_scalar_function_inner(array_expressions::cardinality)(args) }), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 1f659469aa3a..5704a4cf8c38 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -667,7 +667,7 @@ enum ScalarFunction { ArrayIntersect = 119; ArrayUnion = 120; OverLay = 121; - Range = 122; + /// 122 is Range ArrayExcept = 123; ArrayPopFront = 124; Levenshtein = 125; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 8959dd37cf13..833490c8a1d9 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22436,7 +22436,6 @@ impl serde::Serialize for ScalarFunction { Self::ArrayIntersect => "ArrayIntersect", Self::ArrayUnion => "ArrayUnion", Self::OverLay => "OverLay", - Self::Range => "Range", Self::ArrayExcept => "ArrayExcept", Self::ArrayPopFront => "ArrayPopFront", Self::Levenshtein => "Levenshtein", @@ -22577,7 +22576,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayIntersect", "ArrayUnion", "OverLay", - "Range", "ArrayExcept", "ArrayPopFront", "Levenshtein", @@ -22747,7 +22745,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect), "ArrayUnion" => Ok(ScalarFunction::ArrayUnion), "OverLay" => Ok(ScalarFunction::OverLay), - "Range" => Ok(ScalarFunction::Range), "ArrayExcept" => Ok(ScalarFunction::ArrayExcept), "ArrayPopFront" => Ok(ScalarFunction::ArrayPopFront), "Levenshtein" => Ok(ScalarFunction::Levenshtein), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 09152d99c12f..c660746c4c0a 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2755,7 +2755,7 @@ pub enum ScalarFunction { ArrayIntersect = 119, ArrayUnion = 120, OverLay = 121, - Range = 122, + /// / 122 is Range ArrayExcept = 123, ArrayPopFront = 124, Levenshtein = 125, @@ -2893,7 +2893,6 @@ impl ScalarFunction { ScalarFunction::ArrayIntersect => "ArrayIntersect", ScalarFunction::ArrayUnion => "ArrayUnion", ScalarFunction::OverLay => "OverLay", - ScalarFunction::Range => "Range", ScalarFunction::ArrayExcept => "ArrayExcept", ScalarFunction::ArrayPopFront => "ArrayPopFront", ScalarFunction::Levenshtein => "Levenshtein", @@ -3028,7 +3027,6 @@ impl ScalarFunction { "ArrayIntersect" => Some(Self::ArrayIntersect), "ArrayUnion" => Some(Self::ArrayUnion), "OverLay" => Some(Self::OverLay), - "Range" => Some(Self::Range), "ArrayExcept" => Some(Self::ArrayExcept), "ArrayPopFront" => Some(Self::ArrayPopFront), "Levenshtein" => Some(Self::Levenshtein), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index e8059482b1b9..ab3d8614c52f 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -57,8 +57,8 @@ use datafusion_expr::{ concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, - factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, initcap, - instr, iszero, lcm, left, levenshtein, ln, log, log10, log2, + factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, instr, iszero, + lcm, left, levenshtein, ln, log, log10, log2, logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, radians, random, regexp_like, regexp_replace, repeat, replace, reverse, right, round, rpad, @@ -508,7 +508,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayIntersect => Self::ArrayIntersect, ScalarFunction::ArrayUnion => Self::ArrayUnion, ScalarFunction::ArrayResize => Self::ArrayResize, - ScalarFunction::Range => Self::Range, ScalarFunction::Cardinality => Self::Cardinality, ScalarFunction::Array => Self::MakeArray, ScalarFunction::DatePart => Self::DatePart, @@ -1464,12 +1463,6 @@ pub fn parse_expr( parse_expr(&args[2], registry)?, parse_expr(&args[3], registry)?, )), - ScalarFunction::Range => Ok(gen_range( - args.to_owned() - .iter() - .map(|expr| parse_expr(expr, registry)) - .collect::, _>>()?, - )), ScalarFunction::Cardinality => { Ok(cardinality(parse_expr(&args[0], registry)?)) } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 6f126729cb29..b135a22b3d20 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1489,7 +1489,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion, - BuiltinScalarFunction::Range => Self::Range, BuiltinScalarFunction::Cardinality => Self::Cardinality, BuiltinScalarFunction::MakeArray => Self::Array, BuiltinScalarFunction::DatePart => Self::DatePart, diff --git a/datafusion/sqllogictest/test_files/range_and_gen_series.slt b/datafusion/sqllogictest/test_files/range_and_gen_series.slt new file mode 100644 index 000000000000..1e385b8ad44c --- /dev/null +++ b/datafusion/sqllogictest/test_files/range_and_gen_series.slt @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +query ? +SELECT range(5); +---- +[0, 1, 2, 3, 4] + + +query ? +SELECT range(2, 5); +---- +[2, 3, 4] + + +query ? +SELECT range(2, 5, 3); +---- +[2] + +query ? +SELECT generate_series(5); +---- +[0, 1, 2, 3, 4, 5] + +query ? +SELECT generate_series(2, 5); +---- +[2, 3, 4, 5] + +query ? +SELECT generate_series(2, 5, 3); +---- +[2, 5] From 7b3681efbff96f12bcb8a9286c9253bd58b7b0a4 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Mon, 26 Feb 2024 19:49:25 -0600 Subject: [PATCH 2/8] fix failed test --- datafusion/functions-array/src/lib.rs | 2 ++ datafusion/functions-array/src/udf.rs | 13 +++++++++++-- datafusion/sqllogictest/test_files/array.slt | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index 52ee35211888..e3515ccf9f72 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -40,6 +40,8 @@ use std::sync::Arc; /// Fluent-style API for creating `Expr`s pub mod expr_fn { pub use super::udf::array_to_string; + pub use super::udf::gen_series; + pub use super::udf::range; } /// Registers all enabled packages with a [`FunctionRegistry`] diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 7bce44950b04..f19d9d9bf59d 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -127,7 +127,11 @@ impl ScalarUDFImpl for Range { fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { use DataType::*; - Ok(List(Arc::new(Field::new("item", Int64, true)))) + Ok(List(Arc::new(Field::new( + "item", + arg_types[0].clone(), + true, + )))) } fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { @@ -139,6 +143,7 @@ impl ScalarUDFImpl for Range { &self.aliases } } + make_udf_function!( GenSeries, gen_series, @@ -181,7 +186,11 @@ impl ScalarUDFImpl for GenSeries { fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { use DataType::*; - Ok(List(Arc::new(Field::new("item", Int64, true)))) + Ok(List(Arc::new(Field::new( + "item", + arg_types[0].clone(), + true, + )))) } fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index e64346537150..133d4f71fd57 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5550,7 +5550,7 @@ select generate_series(5), generate_series(2, 10, 3) ; ---- -[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] +[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] ## array_except From 5d7d7eda231c36bb72bbfd209e8142878ca92fb1 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Tue, 27 Feb 2024 08:29:50 -0600 Subject: [PATCH 3/8] delete useless and add tests --- datafusion/functions-array/src/kernels.rs | 4 +- .../physical-expr/src/array_expressions.rs | 58 ------------------- datafusion/sqllogictest/test_files/array.slt | 8 ++- .../test_files/range_and_gen_series.slt | 48 --------------- 4 files changed, 8 insertions(+), 110 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/range_and_gen_series.slt diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index 64c99c2417ad..b9a68b466605 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -293,7 +293,7 @@ pub fn gen_range( let mut values = vec![]; let mut offsets = vec![0]; for (idx, stop) in stop_array.iter().enumerate() { - let stop = stop.unwrap_or(0) + include_upper; + let mut stop = stop.unwrap_or(0); let start = start_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(0); let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1); if step == 0 { @@ -301,9 +301,11 @@ pub fn gen_range( } if step < 0 { // Decreasing range + stop -= include_upper; values.extend((stop + 1..start + 1).rev().step_by((-step) as usize)); } else { // Increasing range + stop += include_upper; values.extend((start..stop).step_by(step as usize)); } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index d6cd03ced40a..01b2ae13c8d4 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -886,64 +886,6 @@ where )?)) } -/// Generates an array of integers from start to stop with a given step. -/// -/// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values. -/// It returns a `Result` representing the resulting ListArray after the operation. -/// -/// # Arguments -/// -/// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and step(step value can not be zero.) values. -/// -/// # Examples -/// -/// gen_range(3) => [0, 1, 2] -/// gen_range(1, 4) => [1, 2, 3] -/// gen_range(1, 7, 2) => [1, 3, 5] -pub fn gen_range(args: &[ArrayRef]) -> Result { - let (start_array, stop_array, step_array) = match args.len() { - 1 => (None, as_int64_array(&args[0])?, None), - 2 => ( - Some(as_int64_array(&args[0])?), - as_int64_array(&args[1])?, - None, - ), - 3 => ( - Some(as_int64_array(&args[0])?), - as_int64_array(&args[1])?, - Some(as_int64_array(&args[2])?), - ), - _ => return exec_err!("gen_range expects 1 to 3 arguments"), - }; - - let mut values = vec![]; - let mut offsets = vec![0]; - for (idx, stop) in stop_array.iter().enumerate() { - let stop = stop.unwrap_or(0); - let start = start_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(0); - let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1); - if step == 0 { - return exec_err!("step can't be 0 for function range(start [, stop, step]"); - } - if step < 0 { - // Decreasing range - values.extend((stop + 1..start + 1).rev().step_by((-step) as usize)); - } else { - // Increasing range - values.extend((start..stop).step_by(step as usize)); - } - - offsets.push(values.len() as i32); - } - let arr = Arc::new(ListArray::try_new( - Arc::new(Field::new("item", DataType::Int64, true)), - OffsetBuffer::new(offsets.into()), - Arc::new(Int64Array::from(values)), - None, - )?); - Ok(arr) -} - /// Array_sort SQL function pub fn array_sort(args: &[ArrayRef]) -> Result { if args.is_empty() || args.len() > 3 { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 133d4f71fd57..640bf82b5520 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5544,13 +5544,15 @@ select range(5), ---- [0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [] [] [1, 0, -1, -2, -3, -4] -query ??? +query ????? select generate_series(5), generate_series(2, 5), - generate_series(2, 10, 3) + generate_series(2, 10, 3), + generate_series(1, 5, 1), + generate_series(5, 1, -1) ; ---- -[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] +[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] ## array_except diff --git a/datafusion/sqllogictest/test_files/range_and_gen_series.slt b/datafusion/sqllogictest/test_files/range_and_gen_series.slt deleted file mode 100644 index 1e385b8ad44c..000000000000 --- a/datafusion/sqllogictest/test_files/range_and_gen_series.slt +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -query ? -SELECT range(5); ----- -[0, 1, 2, 3, 4] - - -query ? -SELECT range(2, 5); ----- -[2, 3, 4] - - -query ? -SELECT range(2, 5, 3); ----- -[2] - -query ? -SELECT generate_series(5); ----- -[0, 1, 2, 3, 4, 5] - -query ? -SELECT generate_series(2, 5); ----- -[2, 3, 4, 5] - -query ? -SELECT generate_series(2, 5, 3); ----- -[2, 5] From 9aef2780d493bb73ef44115235c78a36d5e9e1ce Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Tue, 27 Feb 2024 21:12:54 -0600 Subject: [PATCH 4/8] change parameter --- datafusion/functions-array/src/udf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index f19d9d9bf59d..6fe360841a5d 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -88,7 +88,7 @@ impl ScalarUDFImpl for ArrayToString { make_udf_function!( Range, range, - input diamilter, + start stop step, "create a list of values in the range between start and stop", range_udf ); @@ -147,7 +147,7 @@ impl ScalarUDFImpl for Range { make_udf_function!( GenSeries, gen_series, - input diamilter, + start stop step, "create a list of values in the range between start and stop, include upper bound", gen_series_udf ); From a12a4cd0ade7c22535a00bc0ec6a19dcc24f5700 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Tue, 27 Feb 2024 21:26:45 -0600 Subject: [PATCH 5/8] fix document --- .../source/user-guide/sql/scalar_functions.md | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 38da3fd74c26..4b4ec6c74800 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2906,7 +2906,28 @@ empty(array) ### `generate_series` -_Alias of [range](#range)._ +similar as range function, but include the upper bound + +``` +generate_series(start, stop, step) +``` + +#### Arguments + +- **start**: start of the range +- **end**: end of the range (not included) +- **step**: increase by step (can not be 0) + +#### Example + +``` +❯ select generate_series(1,3); ++------------------------------------+ +| generate_series(Int64(1),Int64(3)) | ++------------------------------------+ +| [1, 2, 3] | ++------------------------------------+ +``` ### `list_append` From 3908bb72e6c1ff5c99ef7ec88a72ebb503e06ccb Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Wed, 28 Feb 2024 09:41:11 -0600 Subject: [PATCH 6/8] remove useless --- datafusion/functions-array/src/udf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 6fe360841a5d..17769419c0b2 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -19,7 +19,7 @@ use arrow::datatypes::DataType; use arrow::datatypes::Field; -use datafusion_common::{plan_err, DataFusionError}; +use datafusion_common::plan_err; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::Expr; use datafusion_expr::TypeSignature::Exact; From 665c9c9d9bd5d609c880f065e7cadcfa15768d15 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Wed, 28 Feb 2024 09:44:14 -0600 Subject: [PATCH 7/8] change doc --- docs/source/user-guide/sql/scalar_functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 4b4ec6c74800..0f465c00f4b7 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2906,7 +2906,7 @@ empty(array) ### `generate_series` -similar as range function, but include the upper bound +Similar to the range function, but it includes the upper bound. ``` generate_series(start, stop, step) @@ -2915,7 +2915,7 @@ generate_series(start, stop, step) #### Arguments - **start**: start of the range -- **end**: end of the range (not included) +- **end**: end of the range (included) - **step**: increase by step (can not be 0) #### Example From b9cdde4d841dd7f364f463a545563927f468e2e3 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Wed, 28 Feb 2024 10:02:36 -0600 Subject: [PATCH 8/8] delete space --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 0f465c00f4b7..5006fe7a1330 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2906,7 +2906,7 @@ empty(array) ### `generate_series` -Similar to the range function, but it includes the upper bound. +Similar to the range function, but it includes the upper bound. ``` generate_series(start, stop, step)