From 8d58258ef75c3e61edbd267472b5e95e7246f7a1 Mon Sep 17 00:00:00 2001 From: LuQQiu Date: Mon, 30 Sep 2024 13:57:54 -0700 Subject: [PATCH] feat: improve datafusion SQL planning and parsing errors (#2959) Show as invalid input instead of server internal error --- rust/lance-core/src/error.rs | 38 +++++++++++++++++++---- rust/lance-datafusion/src/logical_expr.rs | 2 +- rust/lance-datafusion/src/planner.rs | 30 +++++++++--------- 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index ed6186a26d..af2a8352dc 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors use arrow_schema::ArrowError; +use datafusion_common::DataFusionError; use snafu::{Location, Snafu}; type BoxedError = Box; @@ -305,7 +306,7 @@ impl From for Error { } #[cfg(feature = "datafusion")] -impl From for datafusion_common::DataFusionError { +impl From for DataFusionError { #[track_caller] fn from(e: Error) -> Self { Self::Execution(e.to_string()) @@ -313,12 +314,37 @@ impl From for datafusion_common::DataFusionError { } #[cfg(feature = "datafusion")] -impl From for Error { +impl From for Error { #[track_caller] - fn from(e: datafusion_common::DataFusionError) -> Self { - Self::IO { - source: box_error(e), - location: std::panic::Location::caller().to_snafu_location(), + fn from(e: DataFusionError) -> Self { + let location = std::panic::Location::caller().to_snafu_location(); + match e { + DataFusionError::SQL(..) + | DataFusionError::Plan(..) + | DataFusionError::Configuration(..) => Self::InvalidInput { + source: box_error(e), + location, + }, + DataFusionError::SchemaError(..) => Self::Schema { + message: e.to_string(), + location, + }, + DataFusionError::ArrowError(..) => Self::Arrow { + message: e.to_string(), + location, + }, + DataFusionError::NotImplemented(..) => Self::NotSupported { + source: box_error(e), + location, + }, + DataFusionError::Execution(..) => Self::Execution { + message: e.to_string(), + location, + }, + _ => Self::IO { + source: box_error(e), + location, + }, } } } diff --git a/rust/lance-datafusion/src/logical_expr.rs b/rust/lance-datafusion/src/logical_expr.rs index 5aebcb1c2a..46c1a64feb 100644 --- a/rust/lance-datafusion/src/logical_expr.rs +++ b/rust/lance-datafusion/src/logical_expr.rs @@ -22,7 +22,7 @@ use snafu::{location, Location}; fn resolve_value(expr: &Expr, data_type: &DataType) -> Result { match expr { Expr::Literal(scalar_value) => { - Ok(Expr::Literal(safe_coerce_scalar(scalar_value, data_type).ok_or_else(|| Error::io( + Ok(Expr::Literal(safe_coerce_scalar(scalar_value, data_type).ok_or_else(|| Error::invalid_input( format!("Received literal {expr} and could not convert to literal of type '{data_type:?}'"), location!(), ))?)) diff --git a/rust/lance-datafusion/src/planner.rs b/rust/lance-datafusion/src/planner.rs index 33854c8988..97557385d6 100644 --- a/rust/lance-datafusion/src/planner.rs +++ b/rust/lance-datafusion/src/planner.rs @@ -276,7 +276,7 @@ impl Planner { BinaryOperator::And => Operator::And, BinaryOperator::Or => Operator::Or, _ => { - return Err(Error::io( + return Err(Error::invalid_input( format!("Operator {op} is not supported"), location!(), )); @@ -306,7 +306,7 @@ impl Planner { Err(_) => lit(-n .parse::() .map_err(|_e| { - Error::io( + Error::invalid_input( format!("negative operator can be only applied to integer and float operands, got: {n}"), location!(), ) @@ -319,7 +319,7 @@ impl Planner { } _ => { - return Err(Error::io( + return Err(Error::invalid_input( format!("Unary operator '{:?}' is not supported", op), location!(), )); @@ -339,7 +339,7 @@ impl Planner { Ok(lit(n)) } else { value.parse::().map(lit).map_err(|_| { - Error::io( + Error::invalid_input( format!("'{value}' is not supported number value."), location!(), ) @@ -364,7 +364,7 @@ impl Planner { fn parse_function_args(&self, func_args: &FunctionArg) -> Result { match func_args { FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) => self.parse_sql_expr(expr), - _ => Err(Error::io( + _ => Err(Error::invalid_input( format!("Unsupported function args: {:?}", func_args), location!(), )), @@ -381,7 +381,7 @@ impl Planner { match &func.args { FunctionArguments::List(args) => { if func.name.0.len() != 1 { - return Err(Error::io( + return Err(Error::invalid_input( format!("Function name must have 1 part, got: {:?}", func.name.0), location!(), )); @@ -390,7 +390,7 @@ impl Planner { self.parse_function_args(&args.args[0])?, ))) } - _ => Err(Error::io( + _ => Err(Error::invalid_input( format!("Unsupported function args: {:?}", &func.args), location!(), )), @@ -456,7 +456,7 @@ impl Planner { match tz { TimezoneInfo::None => {} _ => { - return Err(Error::io( + return Err(Error::invalid_input( "Timezone not supported in timestamp".to_string(), location!(), )); @@ -470,7 +470,7 @@ impl Planner { Some(6) => TimeUnit::Microsecond, Some(9) => TimeUnit::Nanosecond, _ => { - return Err(Error::io( + return Err(Error::invalid_input( format!("Unsupported datetime resolution: {:?}", resolution), location!(), )); @@ -486,7 +486,7 @@ impl Planner { Some(6) => TimeUnit::Microsecond, Some(9) => TimeUnit::Nanosecond, _ => { - return Err(Error::io( + return Err(Error::invalid_input( format!("Unsupported datetime resolution: {:?}", resolution), location!(), )); @@ -498,7 +498,7 @@ impl Planner { ExactNumberInfo::PrecisionAndScale(precision, scale) => { Ok(ArrowDataType::Decimal128(*precision as u8, *scale as i8)) } - _ => Err(Error::io( + _ => Err(Error::invalid_input( format!( "Must provide precision and scale for decimal: {:?}", number_info @@ -506,7 +506,7 @@ impl Planner { location!(), )), }, - _ => Err(Error::io( + _ => Err(Error::invalid_input( format!( "Unsupported data type: {:?}. Supported types: {:?}", data_type, SUPPORTED_TYPES @@ -539,7 +539,7 @@ impl Planner { let mut values = vec![]; let array_literal_error = |pos: usize, value: &_| { - Err(Error::io( + Err(Error::invalid_input( format!( "Expected a literal value in array, instead got {} at position {}", value, pos @@ -582,7 +582,7 @@ impl Planner { for value in &mut values { if value.data_type() != data_type { - *value = safe_coerce_scalar(value, &data_type).ok_or_else(|| Error::io( + *value = safe_coerce_scalar(value, &data_type).ok_or_else(|| Error::invalid_input( format!("Array expressions must have a consistent datatype. Expected: {}, got: {}", data_type, value.data_type()), location!() ))?; @@ -665,7 +665,7 @@ impl Planner { expr: Box::new(self.parse_sql_expr(expr)?), data_type: self.parse_type(data_type)?, })), - _ => Err(Error::io( + _ => Err(Error::invalid_input( format!("Expression '{expr}' is not supported SQL in lance"), location!(), )),