-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move nullif
and isnan
to datafusion-functions
#9216
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,23 +15,15 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use arrow::datatypes::DataType; | ||
//! "core" DataFusion functions | ||
|
||
mod nullif; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To add new functions, we can add the appropriate module and entry in this file |
||
|
||
// create UDFs | ||
make_udf_function!(nullif::NullIfFunc, NULLIF, nullif); | ||
|
||
// Export the functions out of this package, both as expr_fn as well as a list of functions | ||
export_functions!( | ||
(nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression.") | ||
); | ||
|
||
/// Currently supported types by the nullif function. | ||
/// The order of these types correspond to the order on which coercion applies | ||
/// This should thus be from least informative to most informative | ||
pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[ | ||
DataType::Boolean, | ||
DataType::UInt8, | ||
DataType::UInt16, | ||
DataType::UInt32, | ||
DataType::UInt64, | ||
DataType::Int8, | ||
DataType::Int16, | ||
DataType::Int32, | ||
DataType::Int64, | ||
DataType::Float32, | ||
DataType::Float64, | ||
DataType::Utf8, | ||
DataType::LargeUtf8, | ||
]; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,17 +15,89 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! Encoding expressions | ||
|
||
use arrow::{ | ||
datatypes::DataType, | ||
}; | ||
use datafusion_common::{internal_err, Result, DataFusionError}; | ||
use datafusion_expr::{ColumnarValue}; | ||
|
||
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; | ||
use std::any::Any; | ||
use arrow::array::Array; | ||
use arrow::compute::kernels::cmp::eq; | ||
use arrow::compute::kernels::nullif::nullif; | ||
use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue}; | ||
use datafusion_expr::ColumnarValue; | ||
use datafusion_common::{ ScalarValue}; | ||
|
||
#[derive(Debug)] | ||
pub(super) struct NullIfFunc { | ||
signature: Signature, | ||
} | ||
|
||
/// Currently supported types by the nullif function. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this code is just moved from various other places in the codebase. There is no new logic |
||
/// The order of these types correspond to the order on which coercion applies | ||
/// This should thus be from least informative to most informative | ||
static SUPPORTED_NULLIF_TYPES: &[DataType] = &[ | ||
DataType::Boolean, | ||
DataType::UInt8, | ||
DataType::UInt16, | ||
DataType::UInt32, | ||
DataType::UInt64, | ||
DataType::Int8, | ||
DataType::Int16, | ||
DataType::Int32, | ||
DataType::Int64, | ||
DataType::Float32, | ||
DataType::Float64, | ||
DataType::Utf8, | ||
DataType::LargeUtf8, | ||
]; | ||
|
||
|
||
impl NullIfFunc { | ||
pub fn new() -> Self { | ||
Self { | ||
signature: | ||
Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(), | ||
Volatility::Immutable, | ||
) | ||
} | ||
} | ||
} | ||
|
||
impl ScalarUDFImpl for NullIfFunc { | ||
fn as_any(&self) -> &dyn Any { | ||
self | ||
} | ||
fn name(&self) -> &str { | ||
"nullif" | ||
} | ||
|
||
fn signature(&self) -> &Signature { | ||
&self.signature | ||
} | ||
|
||
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { | ||
// NULLIF has two args and they might get coerced, get a preview of this | ||
let coerced_types = datafusion_expr::type_coercion::functions::data_types(arg_types, &self.signature); | ||
coerced_types.map(|typs| typs[0].clone()) | ||
.map_err(|e| e.context("Failed to coerce arguments for NULLIF") | ||
) | ||
} | ||
|
||
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
nullif_func(args) | ||
} | ||
} | ||
|
||
|
||
|
||
/// Implements NULLIF(expr1, expr2) | ||
/// Args: 0 - left expr is any array | ||
/// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. | ||
/// | ||
pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
if args.len() != 2 { | ||
return internal_err!( | ||
"{:?} args were supplied but NULLIF takes exactly two args", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,3 +121,42 @@ macro_rules! make_package { | |
} | ||
}; | ||
} | ||
|
||
/// Invokes a function on each element of an array and returns the result as a new array | ||
/// | ||
/// $ARG: ArrayRef | ||
/// $NAME: name of the function (for error messages) | ||
/// $ARGS_TYPE: the type of array to cast the argument to | ||
/// $RETURN_TYPE: the type of array to return | ||
/// $FUNC: the function to apply to each element of $ARG | ||
/// | ||
macro_rules! make_function_scalar_inputs_return_type { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is copied (with more documentation) from datafusion-physical_expr. Once we move all the functions we can remove the original copy |
||
($ARG: expr, $NAME:expr, $ARG_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{ | ||
let arg = downcast_arg!($ARG, $NAME, $ARG_TYPE); | ||
|
||
arg.iter() | ||
.map(|a| match a { | ||
Some(a) => Some($FUNC(a)), | ||
_ => None, | ||
}) | ||
.collect::<$RETURN_TYPE>() | ||
}}; | ||
} | ||
|
||
/// Downcast an argument to a specific array type, returning an internal error | ||
/// if the cast fails | ||
/// | ||
/// $ARG: ArrayRef | ||
/// $NAME: name of the argument (for error messages) | ||
/// $ARRAY_TYPE: the type of array to cast the argument to | ||
macro_rules! downcast_arg { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember to move those downcast macros :) |
||
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{ | ||
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { | ||
DataFusionError::Internal(format!( | ||
"could not cast {} to {}", | ||
$NAME, | ||
std::any::type_name::<$ARRAY_TYPE>() | ||
)) | ||
})? | ||
}}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One benefit is that the implementation of these functions are consolidated, rather than having them spread all over