From 00c21b165c619481f939e6582f4b855a004f4a1a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 9 Mar 2024 11:23:58 -0500 Subject: [PATCH 1/3] Minor: Improve documentation for registering `AnalyzerRule` --- datafusion/core/src/execution/context/mod.rs | 21 +++++++++++-------- datafusion/core/src/lib.rs | 7 ++++--- .../core/src/physical_optimizer/optimizer.rs | 8 +++++-- datafusion/optimizer/src/analyzer/mod.rs | 15 ++++++++----- datafusion/optimizer/src/optimizer.rs | 11 +++++++++- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index e071c5c80e11..6bedd4ae389e 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1587,7 +1587,7 @@ impl SessionState { self } - /// Replace the default query planner + /// use `query_planner` to plan queries, replacing the default pub fn with_query_planner( mut self, query_planner: Arc, @@ -1596,7 +1596,7 @@ impl SessionState { self } - /// Replace the analyzer rules + /// Replace the entire list of [`AnalyzerRule`]s used to optimize plans. pub fn with_analyzer_rules( mut self, rules: Vec>, @@ -1605,7 +1605,7 @@ impl SessionState { self } - /// Replace the optimizer rules + /// Replace the entire list of [`OptimizerRule`]s used to optimize plans pub fn with_optimizer_rules( mut self, rules: Vec>, @@ -1614,7 +1614,7 @@ impl SessionState { self } - /// Replace the physical optimizer rules + /// Replace the entire list of [`PhysicalOptimizerRule`]s used to optimize plans pub fn with_physical_optimizer_rules( mut self, physical_optimizers: Vec>, @@ -1623,7 +1623,8 @@ impl SessionState { self } - /// Adds a new [`AnalyzerRule`] + /// Add `analyzer_rule` to the end of the list of + /// [`AnalyzerRule`]s used to rewrite queries. pub fn add_analyzer_rule( mut self, analyzer_rule: Arc, @@ -1632,7 +1633,8 @@ impl SessionState { self } - /// Adds a new [`OptimizerRule`] + /// Add `optimizer_rule` to the end of the list of + /// [`OptimizerRule`]s used to rewrite queries. pub fn add_optimizer_rule( mut self, optimizer_rule: Arc, @@ -1641,12 +1643,13 @@ impl SessionState { self } - /// Adds a new [`PhysicalOptimizerRule`] + /// Add `physical_optimizer_rule` to the end of the list of + /// [`PhysicalOptimizerRule`]s used to rewrite queries. pub fn add_physical_optimizer_rule( mut self, - optimizer_rule: Arc, + physical_optimizer_rule: Arc, ) -> Self { - self.physical_optimizers.rules.push(optimizer_rule); + self.physical_optimizers.rules.push(physical_optimizer_rule); self } diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 2b565ece7568..5dc3e1ce7d3f 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -132,9 +132,9 @@ //! //! ## Customization and Extension //! -//! DataFusion is a "disaggregated" query engine. This +//! DataFusion is a "disaggregated" query engine. This //! means developers can start with a working, full featured engine, and then -//! extend the parts of DataFusion they need to specialize for their usecase. For example, +//! extend the areas they need to specialize for their usecase. For example, //! some projects may add custom [`ExecutionPlan`] operators, or create their own //! query language that directly creates [`LogicalPlan`] rather than using the //! built in SQL planner, [`SqlToRel`]. @@ -145,7 +145,7 @@ //! * define your own catalogs, schemas, and table lists ([`CatalogProvider`]) //! * build your own query language or plans ([`LogicalPlanBuilder`]) //! * declare and use user-defined functions ([`ScalarUDF`], and [`AggregateUDF`], [`WindowUDF`]) -//! * add custom optimizer rewrite passes ([`OptimizerRule`] and [`PhysicalOptimizerRule`]) +//! * add custom plan rewrite passes ([`AnalyzerRule`], [`OptimizerRule`] and [`PhysicalOptimizerRule`]) //! * extend the planner to use user-defined logical and physical nodes ([`QueryPlanner`]) //! //! You can find examples of each of them in the [datafusion-examples] directory. @@ -158,6 +158,7 @@ //! [`WindowUDF`]: crate::logical_expr::WindowUDF //! [`QueryPlanner`]: execution::context::QueryPlanner //! [`OptimizerRule`]: datafusion_optimizer::optimizer::OptimizerRule +//! [`AnalyzerRule`]: datafusion_optimizer::analyzer::AnalyzerRule //! [`PhysicalOptimizerRule`]: crate::physical_optimizer::optimizer::PhysicalOptimizerRule //! //! # Architecture diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs index f8c82576e254..9ba264eed002 100644 --- a/datafusion/core/src/physical_optimizer/optimizer.rs +++ b/datafusion/core/src/physical_optimizer/optimizer.rs @@ -34,8 +34,12 @@ use crate::physical_optimizer::topk_aggregation::TopKAggregation; use crate::{error::Result, physical_plan::ExecutionPlan}; /// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which -/// computes the same results, but in a potentially more efficient -/// way. +/// computes the same results, but in a potentially more efficient way. +/// +/// Use [`SessionState::add_physical_optimizer_rule`] to register additional +/// `PhysicalOptimizerRule`s.. +/// +/// [`SessionState::add_physical_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionState.html#method.add_physical_optimizer_rule pub trait PhysicalOptimizerRule { /// Rewrite `plan` to an optimized form fn optimize( diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs index ad852b460fc6..d68521284176 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/optimizer/src/analyzer/mod.rs @@ -46,13 +46,18 @@ use self::rewrite_expr::OperatorToFunction; /// [`AnalyzerRule`]s transform [`LogicalPlan`]s in some way to make /// the plan valid prior to the rest of the DataFusion optimization process. /// -/// For example, it may resolve [`Expr`]s into more specific forms such -/// as a subquery reference, to do type coercion to ensure the types +/// This is different than an [`OptimizerRule`](crate::OptimizerRule) +/// which must preserve the semantics of the `LogicalPlan`, while computing +/// results in a more optimal way. +/// +/// For example, an `AnalyzerRule` may resolve [`Expr`]s into more specific +/// forms such as a subquery reference, or do type coercion to ensure the types /// of operands are correct. /// -/// This is different than an [`OptimizerRule`](crate::OptimizerRule) -/// which should preserve the semantics of the LogicalPlan but compute -/// it the same result in some more optimal way. +/// Use [`SessionState::add_analyzer_rule`] to register additional +/// `AnalyzerRule`s. +/// +/// [`SessionState::add_analyzer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionState.html#method.add_analyzer_rule pub trait AnalyzerRule { /// Rewrite `plan` fn analyze(&self, plan: LogicalPlan, config: &ConfigOptions) -> Result; diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 5a594e75dd8a..fe63766fc265 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -57,7 +57,16 @@ use log::{debug, warn}; /// `OptimizerRule` transforms one [`LogicalPlan`] into another which /// computes the same results, but in a potentially more efficient /// way. If there are no suitable transformations for the input plan, -/// the optimizer can simply return it as is. +/// the optimizer should simply return it unmodified. +/// +/// To change the semantics of a `LogicalPlan`, see [`AnalyzerRule`] +/// +/// Use [`SessionState::add_optimizer_rule`] to register additional +/// `OptimizerRule`s. +/// +/// [`AnalyzerRule`]: crate::analyzer::AnalyzerRule +/// [`SessionState::add_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionState.html#method.add_optimizer_rule + pub trait OptimizerRule { /// Try and rewrite `plan` to an optimized form, returning None if the plan cannot be /// optimized by this rule. From 716a2ad0be805c9f4a745105402c71fa1025ed9c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 10 Mar 2024 05:36:54 -0400 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: comphead --- datafusion/core/src/execution/context/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 6bedd4ae389e..2a080cf82501 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1587,7 +1587,7 @@ impl SessionState { self } - /// use `query_planner` to plan queries, replacing the default + /// override default query planner with `query_planner` pub fn with_query_planner( mut self, query_planner: Arc, @@ -1596,7 +1596,7 @@ impl SessionState { self } - /// Replace the entire list of [`AnalyzerRule`]s used to optimize plans. + /// Override the [`AnalyzerRule`]s optimizer plan rules. pub fn with_analyzer_rules( mut self, rules: Vec>, From 319e3f04f4004fb2b15505ffcecba3c1b4c23bdf Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 10 Mar 2024 06:31:28 -0400 Subject: [PATCH 3/3] Update datafusion/core/src/physical_optimizer/optimizer.rs --- datafusion/core/src/physical_optimizer/optimizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs index 28b9fdd8951d..48da68cb2e37 100644 --- a/datafusion/core/src/physical_optimizer/optimizer.rs +++ b/datafusion/core/src/physical_optimizer/optimizer.rs @@ -37,7 +37,7 @@ use crate::{error::Result, physical_plan::ExecutionPlan}; /// computes the same results, but in a potentially more efficient way. /// /// Use [`SessionState::add_physical_optimizer_rule`] to register additional -/// `PhysicalOptimizerRule`s.. +/// `PhysicalOptimizerRule`s. /// /// [`SessionState::add_physical_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionState.html#method.add_physical_optimizer_rule pub trait PhysicalOptimizerRule {