From 4687a2f793019deb199f1759f5171730a6434189 Mon Sep 17 00:00:00 2001 From: comphead Date: Mon, 18 Mar 2024 13:59:00 -0700 Subject: [PATCH] minor: Remove deprecated methods (#9627) * minor: remove deprecared code * Remove deprecated test * docs --------- Co-authored-by: Andrew Lamb --- datafusion/common/src/dfschema.rs | 51 +++----------- datafusion/core/src/dataframe/mod.rs | 10 --- datafusion/core/src/datasource/listing/url.rs | 33 --------- datafusion/core/src/execution/context/mod.rs | 43 ------------ datafusion/execution/src/config.rs | 16 +---- datafusion/execution/src/task.rs | 45 ++---------- datafusion/expr/src/aggregate_function.rs | 22 ------ datafusion/expr/src/expr.rs | 28 -------- datafusion/expr/src/expr_rewriter/mod.rs | 43 ------------ datafusion/expr/src/function.rs | 25 +------ datafusion/expr/src/logical_plan/plan.rs | 70 ------------------- datafusion/physical-plan/src/common.rs | 6 -- datafusion/physical-plan/src/sorts/sort.rs | 27 ------- 13 files changed, 18 insertions(+), 401 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 2642032c9a04..597507a044a2 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -97,10 +97,11 @@ pub type DFSchemaRef = Arc; /// ```rust /// use datafusion_common::{DFSchema, DFField}; /// use arrow_schema::Schema; +/// use std::collections::HashMap; /// -/// let df_schema = DFSchema::new(vec![ +/// let df_schema = DFSchema::new_with_metadata(vec![ /// DFField::new_unqualified("c1", arrow::datatypes::DataType::Int32, false), -/// ]).unwrap(); +/// ], HashMap::new()).unwrap(); /// let schema = Schema::from(df_schema); /// assert_eq!(schema.fields().len(), 1); /// ``` @@ -124,12 +125,6 @@ impl DFSchema { } } - #[deprecated(since = "7.0.0", note = "please use `new_with_metadata` instead")] - /// Create a new `DFSchema` - pub fn new(fields: Vec) -> Result { - Self::new_with_metadata(fields, HashMap::new()) - } - /// Create a new `DFSchema` pub fn new_with_metadata( fields: Vec, @@ -251,32 +246,6 @@ impl DFSchema { &self.fields[i] } - #[deprecated(since = "8.0.0", note = "please use `index_of_column_by_name` instead")] - /// Find the index of the column with the given unqualified name - pub fn index_of(&self, name: &str) -> Result { - for i in 0..self.fields.len() { - if self.fields[i].name() == name { - return Ok(i); - } else { - // Now that `index_of` is deprecated an error is thrown if - // a fully qualified field name is provided. - match &self.fields[i].qualifier { - Some(qualifier) => { - if (qualifier.to_string() + "." + self.fields[i].name()) == name { - return _plan_err!( - "Fully qualified field name '{name}' was supplied to `index_of` \ - which is deprecated. Please use `index_of_column_by_name` instead" - ); - } - } - None => (), - } - } - } - - Err(unqualified_field_not_found(name, self)) - } - pub fn index_of_column_by_name( &self, qualifier: Option<&TableReference>, @@ -1146,13 +1115,10 @@ mod tests { Ok(()) } - #[allow(deprecated)] #[test] fn helpful_error_messages() -> Result<()> { let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; let expected_help = "Valid fields are t1.c0, t1.c1."; - // Pertinent message parts - let expected_err_msg = "Fully qualified field name 't1.c0'"; assert_contains!( schema .field_with_qualified_name(&TableReference::bare("x"), "y") @@ -1167,11 +1133,12 @@ mod tests { .to_string(), expected_help ); - assert_contains!(schema.index_of("y").unwrap_err().to_string(), expected_help); - assert_contains!( - schema.index_of("t1.c0").unwrap_err().to_string(), - expected_err_msg - ); + assert!(schema.index_of_column_by_name(None, "y").unwrap().is_none()); + assert!(schema + .index_of_column_by_name(None, "t1.c0") + .unwrap() + .is_none()); + Ok(()) } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 5f192b83fdd9..25830401571d 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1001,16 +1001,6 @@ impl DataFrame { Arc::new(DataFrameTableProvider { plan: self.plan }) } - /// Return the optimized logical plan represented by this DataFrame. - /// - /// Note: This method should not be used outside testing, as it loses the snapshot - /// of the [`SessionState`] attached to this [`DataFrame`] and consequently subsequent - /// operations may take place against a different state - #[deprecated(since = "23.0.0", note = "Use DataFrame::into_optimized_plan")] - pub fn to_logical_plan(self) -> Result { - self.into_optimized_plan() - } - /// Return a DataFrame with the explanation of its plan so far. /// /// if `analyze` is specified, runs the plan and reports metrics diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index d9149bcc20e0..eb95dc7b1d24 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::fs; - use crate::datasource::object_store::ObjectStoreUrl; use crate::execution::context::SessionState; use datafusion_common::{DataFusionError, Result}; @@ -117,37 +115,6 @@ impl ListingTableUrl { } } - /// Get object store for specified input_url - /// if input_url is actually not a url, we assume it is a local file path - /// if we have a local path, create it if not exists so ListingTableUrl::parse works - #[deprecated(note = "Use parse")] - pub fn parse_create_local_if_not_exists( - s: impl AsRef, - is_directory: bool, - ) -> Result { - let s = s.as_ref(); - let is_valid_url = Url::parse(s).is_ok(); - - match is_valid_url { - true => ListingTableUrl::parse(s), - false => { - let path = std::path::PathBuf::from(s); - if !path.exists() { - if is_directory { - fs::create_dir_all(path)?; - } else { - // ensure parent directory exists - if let Some(parent) = path.parent() { - fs::create_dir_all(parent)?; - } - fs::File::create(path)?; - } - } - ListingTableUrl::parse(s) - } - } - } - /// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path #[cfg(not(target_arch = "wasm32"))] fn parse_path(s: &str) -> Result { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 32c1c60ec564..1ac7da465216 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1181,49 +1181,6 @@ impl SessionContext { } } - /// Returns the set of available tables in the default catalog and - /// schema. - /// - /// Use [`table`] to get a specific table. - /// - /// [`table`]: SessionContext::table - #[deprecated( - since = "23.0.0", - note = "Please use the catalog provider interface (`SessionContext::catalog`) to examine available catalogs, schemas, and tables" - )] - pub fn tables(&self) -> Result> { - Ok(self - .state - .read() - // a bare reference will always resolve to the default catalog and schema - .schema_for_ref(TableReference::Bare { table: "".into() })? - .table_names() - .iter() - .cloned() - .collect()) - } - - /// Optimizes the logical plan by applying optimizer rules. - #[deprecated( - since = "23.0.0", - note = "Use SessionState::optimize to ensure a consistent state for planning and execution" - )] - pub fn optimize(&self, plan: &LogicalPlan) -> Result { - self.state.read().optimize(plan) - } - - /// Creates a physical plan from a logical plan. - #[deprecated( - since = "23.0.0", - note = "Use SessionState::create_physical_plan or DataFrame::create_physical_plan to ensure a consistent state for planning and execution" - )] - pub async fn create_physical_plan( - &self, - logical_plan: &LogicalPlan, - ) -> Result> { - self.state().create_physical_plan(logical_plan).await - } - /// Get a new TaskContext to run in this session pub fn task_ctx(&self) -> Arc { Arc::new(TaskContext::from(self)) diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs index 312aef953e9c..360bac71c510 100644 --- a/datafusion/execution/src/config.rs +++ b/datafusion/execution/src/config.rs @@ -434,9 +434,9 @@ impl SessionConfig { /// converted to strings. /// /// Note that this method will eventually be deprecated and - /// replaced by [`config_options`]. + /// replaced by [`options`]. /// - /// [`config_options`]: Self::config_options + /// [`options`]: Self::options pub fn to_props(&self) -> HashMap { let mut map = HashMap::new(); // copy configs from config_options @@ -447,18 +447,6 @@ impl SessionConfig { map } - /// Return a handle to the configuration options. - #[deprecated(since = "21.0.0", note = "use options() instead")] - pub fn config_options(&self) -> &ConfigOptions { - &self.options - } - - /// Return a mutable handle to the configuration options. - #[deprecated(since = "21.0.0", note = "use options_mut() instead")] - pub fn config_options_mut(&mut self) -> &mut ConfigOptions { - &mut self.options - } - /// Add extensions. /// /// Extensions can be used to attach extra data to the session config -- e.g. tracing information or caches. diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs index cae410655d10..4216ce95f35e 100644 --- a/datafusion/execution/src/task.rs +++ b/datafusion/execution/src/task.rs @@ -20,10 +20,7 @@ use std::{ sync::Arc, }; -use datafusion_common::{ - config::{ConfigOptions, Extensions}, - plan_datafusion_err, DataFusionError, Result, -}; +use datafusion_common::{plan_datafusion_err, DataFusionError, Result}; use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; use crate::{ @@ -102,39 +99,6 @@ impl TaskContext { } } - /// Create a new task context instance, by first copying all - /// name/value pairs from `task_props` into a `SessionConfig`. - #[deprecated( - since = "21.0.0", - note = "Construct SessionConfig and call TaskContext::new() instead" - )] - pub fn try_new( - task_id: String, - session_id: String, - task_props: HashMap, - scalar_functions: HashMap>, - aggregate_functions: HashMap>, - runtime: Arc, - extensions: Extensions, - ) -> Result { - let mut config = ConfigOptions::new().with_extensions(extensions); - for (k, v) in task_props { - config.set(&k, &v)?; - } - let session_config = SessionConfig::from(config); - let window_functions = HashMap::new(); - - Ok(Self::new( - Some(task_id), - session_id, - session_config, - scalar_functions, - aggregate_functions, - window_functions, - runtime, - )) - } - /// Return the SessionConfig associated with this [TaskContext] pub fn session_config(&self) -> &SessionConfig { &self.session_config @@ -160,7 +124,7 @@ impl TaskContext { self.runtime.clone() } - /// Update the [`ConfigOptions`] + /// Update the [`SessionConfig`] pub fn with_session_config(mut self, session_config: SessionConfig) -> Self { self.session_config = session_config; self @@ -229,7 +193,10 @@ impl FunctionRegistry for TaskContext { #[cfg(test)] mod tests { use super::*; - use datafusion_common::{config::ConfigExtension, extensions_options}; + use datafusion_common::{ + config::{ConfigExtension, ConfigOptions, Extensions}, + extensions_options, + }; extensions_options! { struct TestExtension { diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs index 574de3e7082a..85f8c74f3737 100644 --- a/datafusion/expr/src/aggregate_function.rs +++ b/datafusion/expr/src/aggregate_function.rs @@ -218,19 +218,6 @@ impl FromStr for AggregateFunction { } } -/// Returns the datatype of the aggregate function. -/// This is used to get the returned data type for aggregate expr. -#[deprecated( - since = "27.0.0", - note = "please use `AggregateFunction::return_type` instead" -)] -pub fn return_type( - fun: &AggregateFunction, - input_expr_types: &[DataType], -) -> Result { - fun.return_type(input_expr_types) -} - impl AggregateFunction { /// Returns the datatype of the aggregate function given its argument types /// @@ -328,15 +315,6 @@ pub fn sum_type_of_avg(input_expr_types: &[DataType]) -> Result { avg_sum_type(&coerced_data_types[0]) } -/// the signatures supported by the function `fun`. -#[deprecated( - since = "27.0.0", - note = "please use `AggregateFunction::signature` instead" -)] -pub fn signature(fun: &AggregateFunction) -> Signature { - fun.signature() -} - impl AggregateFunction { /// the signatures supported by the function `fun`. pub fn signature(&self) -> Signature { diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0da05d96f67e..7ede4cd8ffc9 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -703,27 +703,6 @@ pub fn find_df_window_func(name: &str) -> Option { } } -/// Returns the datatype of the window function -#[deprecated( - since = "27.0.0", - note = "please use `WindowFunction::return_type` instead" -)] -pub fn return_type( - fun: &WindowFunctionDefinition, - input_expr_types: &[DataType], -) -> Result { - fun.return_type(input_expr_types) -} - -/// the signatures supported by the function `fun`. -#[deprecated( - since = "27.0.0", - note = "please use `WindowFunction::signature` instead" -)] -pub fn signature(fun: &WindowFunctionDefinition) -> Signature { - fun.signature() -} - // Exists expression. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Exists { @@ -887,13 +866,6 @@ impl Expr { create_name(self) } - /// Returns the name of this expression as it should appear in a schema. This name - /// will not include any CAST expressions. - #[deprecated(since = "14.0.0", note = "please use `display_name` instead")] - pub fn name(&self) -> Result { - self.display_name() - } - /// Returns a full and complete string representation of this expression. pub fn canonical_name(&self) -> String { format!("{self}") diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 357b1aed7dde..ea3ffadda391 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -74,31 +74,6 @@ pub fn normalize_col(expr: Expr, plan: &LogicalPlan) -> Result { .data() } -/// Recursively call [`Column::normalize_with_schemas`] on all [`Column`] expressions -/// in the `expr` expression tree. -#[deprecated( - since = "20.0.0", - note = "use normalize_col_with_schemas_and_ambiguity_check instead" -)] -#[allow(deprecated)] -pub fn normalize_col_with_schemas( - expr: Expr, - schemas: &[&Arc], - using_columns: &[HashSet], -) -> Result { - expr.transform(&|expr| { - Ok({ - if let Expr::Column(c) = expr { - let col = c.normalize_with_schemas(schemas, using_columns)?; - Transformed::yes(Expr::Column(col)) - } else { - Transformed::no(expr) - } - }) - }) - .data() -} - /// See [`Column::normalize_with_schemas_and_ambiguity_check`] for usage pub fn normalize_col_with_schemas_and_ambiguity_check( expr: Expr, @@ -398,24 +373,6 @@ mod test { ); } - #[test] - #[allow(deprecated)] - fn normalize_cols_priority() { - let expr = col("a") + col("b"); - // Schemas with multiple matches for column a, first takes priority - let schema_a = make_schema_with_empty_metadata(vec![make_field("tableA", "a")]); - let schema_b = make_schema_with_empty_metadata(vec![make_field("tableB", "b")]); - let schema_a2 = make_schema_with_empty_metadata(vec![make_field("tableA2", "a")]); - let schemas = vec![schema_a2, schema_b, schema_a] - .into_iter() - .map(Arc::new) - .collect::>(); - let schemas = schemas.iter().collect::>(); - - let normalized_expr = normalize_col_with_schemas(expr, &schemas, &[]).unwrap(); - assert_eq!(normalized_expr, col("tableA2.a") + col("tableB.b")); - } - #[test] fn normalize_cols_non_exist() { // test normalizing columns when the name doesn't exist diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index a3760eeb357d..adf4dd3fef20 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -17,9 +17,7 @@ //! Function module contains typing and signature for built-in and user defined functions. -use crate::{ - Accumulator, BuiltinScalarFunction, ColumnarValue, PartitionEvaluator, Signature, -}; +use crate::{Accumulator, ColumnarValue, PartitionEvaluator}; use arrow::datatypes::DataType; use datafusion_common::Result; use std::sync::Arc; @@ -53,24 +51,3 @@ pub type PartitionEvaluatorFactory = /// its state, given its return datatype. pub type StateTypeFunction = Arc Result>> + Send + Sync>; - -/// Returns the datatype of the scalar function -#[deprecated( - since = "27.0.0", - note = "please use `BuiltinScalarFunction::return_type` instead" -)] -pub fn return_type( - fun: &BuiltinScalarFunction, - input_expr_types: &[DataType], -) -> Result { - fun.return_type(input_expr_types) -} - -/// Return the [`Signature`] supported by the function `fun`. -#[deprecated( - since = "27.0.0", - note = "please use `BuiltinScalarFunction::signature` instead" -)] -pub fn signature(fun: &BuiltinScalarFunction) -> Signature { - fun.signature() -} diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index c6f280acb255..08fe3380061f 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -217,56 +217,6 @@ impl LogicalPlan { } } - /// Get all meaningful schemas of a plan and its children plan. - #[deprecated(since = "20.0.0")] - pub fn all_schemas(&self) -> Vec<&DFSchemaRef> { - match self { - // return self and children schemas - LogicalPlan::Window(_) - | LogicalPlan::Projection(_) - | LogicalPlan::Aggregate(_) - | LogicalPlan::Unnest(_) - | LogicalPlan::Join(_) - | LogicalPlan::CrossJoin(_) => { - let mut schemas = vec![self.schema()]; - self.inputs().iter().for_each(|input| { - schemas.push(input.schema()); - }); - schemas - } - // just return self.schema() - LogicalPlan::Explain(_) - | LogicalPlan::Analyze(_) - | LogicalPlan::EmptyRelation(_) - | LogicalPlan::Ddl(_) - | LogicalPlan::Dml(_) - | LogicalPlan::Copy(_) - | LogicalPlan::Values(_) - | LogicalPlan::SubqueryAlias(_) - | LogicalPlan::Union(_) - | LogicalPlan::Extension(_) - | LogicalPlan::TableScan(_) => { - vec![self.schema()] - } - LogicalPlan::RecursiveQuery(RecursiveQuery { static_term, .. }) => { - // return only the schema of the static term - static_term.all_schemas() - } - // return children schemas - LogicalPlan::Limit(_) - | LogicalPlan::Subquery(_) - | LogicalPlan::Repartition(_) - | LogicalPlan::Sort(_) - | LogicalPlan::Filter(_) - | LogicalPlan::Distinct(_) - | LogicalPlan::Prepare(_) => { - self.inputs().iter().map(|p| p.schema()).collect() - } - // return empty - LogicalPlan::Statement(_) | LogicalPlan::DescribeTable(_) => vec![], - } - } - /// Returns the (fixed) output schema for explain plans pub fn explain_schema() -> SchemaRef { SchemaRef::new(Schema::new(vec![ @@ -3079,14 +3029,6 @@ digraph { empty_schema: DFSchemaRef, } - impl NoChildExtension { - fn empty() -> Self { - Self { - empty_schema: Arc::new(DFSchema::empty()), - } - } - } - impl UserDefinedLogicalNode for NoChildExtension { fn as_any(&self) -> &dyn std::any::Any { unimplemented!() @@ -3129,18 +3071,6 @@ digraph { } } - #[test] - #[allow(deprecated)] - fn test_extension_all_schemas() { - let plan = LogicalPlan::Extension(Extension { - node: Arc::new(NoChildExtension::empty()), - }); - - let schemas = plan.all_schemas(); - assert_eq!(1, schemas.len()); - assert_eq!(0, schemas[0].fields().len()); - } - #[test] fn test_replace_invalid_placeholder() { // test empty placeholder diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index f4a2cba68e16..59c54199333e 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -349,12 +349,6 @@ pub fn can_project( } } -/// Returns the total number of bytes of memory occupied physically by this batch. -#[deprecated(since = "28.0.0", note = "RecordBatch::get_array_memory_size")] -pub fn batch_byte_size(batch: &RecordBatch) -> usize { - batch.get_array_memory_size() -} - #[cfg(test)] mod tests { use std::ops::Not; diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index db352bb2c86f..a80dab058ca6 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -733,16 +733,6 @@ pub struct SortExec { } impl SortExec { - /// Create a new sort execution plan - #[deprecated(since = "22.0.0", note = "use `new` and `with_fetch`")] - pub fn try_new( - expr: Vec, - input: Arc, - fetch: Option, - ) -> Result { - Ok(Self::new(expr, input).with_fetch(fetch)) - } - /// Create a new sort execution plan that produces a single, /// sorted output partition. pub fn new(expr: Vec, input: Arc) -> Self { @@ -758,23 +748,6 @@ impl SortExec { } } - /// Create a new sort execution plan with the option to preserve - /// the partitioning of the input plan - #[deprecated( - since = "22.0.0", - note = "use `new`, `with_fetch` and `with_preserve_partioning` instead" - )] - pub fn new_with_partitioning( - expr: Vec, - input: Arc, - preserve_partitioning: bool, - fetch: Option, - ) -> Self { - Self::new(expr, input) - .with_fetch(fetch) - .with_preserve_partitioning(preserve_partitioning) - } - /// Whether this `SortExec` preserves partitioning of the children pub fn preserve_partitioning(&self) -> bool { self.preserve_partitioning