-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support create_physical_expr
and ExecutionContextState
or DefaultPhysicalPlanner
for faster speed
#1700
Support create_physical_expr
and ExecutionContextState
or DefaultPhysicalPlanner
for faster speed
#1700
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,6 @@ impl ExecutionContext { | |
state: Arc::new(Mutex::new(ExecutionContextState { | ||
catalog_list, | ||
scalar_functions: HashMap::new(), | ||
var_provider: HashMap::new(), | ||
aggregate_functions: HashMap::new(), | ||
config, | ||
execution_props: ExecutionProps::new(), | ||
|
@@ -324,8 +323,8 @@ impl ExecutionContext { | |
self.state | ||
.lock() | ||
.unwrap() | ||
.var_provider | ||
.insert(variable_type, provider); | ||
.execution_props | ||
.add_var_provider(variable_type, provider); | ||
} | ||
|
||
/// Registers a scalar UDF within this context. | ||
|
@@ -1115,9 +1114,14 @@ impl ExecutionConfig { | |
/// An instance of this struct is created each time a [`LogicalPlan`] is prepared for | ||
/// execution (optimized). If the same plan is optimized multiple times, a new | ||
/// `ExecutionProps` is created each time. | ||
/// | ||
/// It is important that this structure be cheap to create as it is | ||
/// done so during predicate pruning and expression simplification | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
#[derive(Clone)] | ||
pub struct ExecutionProps { | ||
pub(crate) query_execution_start_time: DateTime<Utc>, | ||
/// providers for scalar variables | ||
pub var_providers: Option<HashMap<VarType, Arc<dyn VarProvider + Send + Sync>>>, | ||
} | ||
|
||
impl Default for ExecutionProps { | ||
|
@@ -1131,6 +1135,7 @@ impl ExecutionProps { | |
pub fn new() -> Self { | ||
ExecutionProps { | ||
query_execution_start_time: chrono::Utc::now(), | ||
var_providers: None, | ||
} | ||
} | ||
|
||
|
@@ -1139,6 +1144,32 @@ impl ExecutionProps { | |
self.query_execution_start_time = chrono::Utc::now(); | ||
&*self | ||
} | ||
|
||
/// Registers a variable provider, returning the existing | ||
/// provider, if any | ||
pub fn add_var_provider( | ||
&mut self, | ||
var_type: VarType, | ||
provider: Arc<dyn VarProvider + Send + Sync>, | ||
) -> Option<Arc<dyn VarProvider + Send + Sync>> { | ||
let mut var_providers = self.var_providers.take().unwrap_or_else(HashMap::new); | ||
|
||
let old_provider = var_providers.insert(var_type, provider); | ||
|
||
self.var_providers = Some(var_providers); | ||
|
||
old_provider | ||
} | ||
|
||
/// Returns the provider for the var_type, if any | ||
pub fn get_var_provider( | ||
&self, | ||
var_type: VarType, | ||
) -> Option<Arc<dyn VarProvider + Send + Sync>> { | ||
self.var_providers | ||
.as_ref() | ||
.and_then(|var_providers| var_providers.get(&var_type).map(Arc::clone)) | ||
} | ||
} | ||
|
||
/// Execution context for registering data sources and executing queries | ||
|
@@ -1148,8 +1179,6 @@ pub struct ExecutionContextState { | |
pub catalog_list: Arc<dyn CatalogList>, | ||
/// Scalar functions that are registered with the context | ||
pub scalar_functions: HashMap<String, Arc<ScalarUDF>>, | ||
/// Variable provider that are registered with the context | ||
pub var_provider: HashMap<VarType, Arc<dyn VarProvider + Send + Sync>>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. while technically this is a breaking change, the |
||
/// Aggregate functions registered in the context | ||
pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>, | ||
/// Context configuration | ||
|
@@ -1174,7 +1203,6 @@ impl ExecutionContextState { | |
ExecutionContextState { | ||
catalog_list: Arc::new(MemoryCatalogList::new()), | ||
scalar_functions: HashMap::new(), | ||
var_provider: HashMap::new(), | ||
aggregate_functions: HashMap::new(), | ||
config: ExecutionConfig::new(), | ||
execution_props: ExecutionProps::new(), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,13 +22,13 @@ use arrow::datatypes::{DataType, Field, Schema}; | |
use arrow::record_batch::RecordBatch; | ||
|
||
use crate::error::DataFusionError; | ||
use crate::execution::context::{ExecutionContextState, ExecutionProps}; | ||
use crate::execution::context::ExecutionProps; | ||
use crate::logical_plan::{lit, DFSchemaRef, Expr}; | ||
use crate::logical_plan::{DFSchema, ExprRewriter, LogicalPlan, RewriteRecursion}; | ||
use crate::optimizer::optimizer::OptimizerRule; | ||
use crate::optimizer::utils; | ||
use crate::physical_plan::functions::Volatility; | ||
use crate::physical_plan::planner::DefaultPhysicalPlanner; | ||
use crate::physical_plan::planner::create_physical_expr; | ||
use crate::scalar::ScalarValue; | ||
use crate::{error::Result, logical_plan::Operator}; | ||
|
||
|
@@ -223,7 +223,7 @@ impl SimplifyExpressions { | |
/// let rewritten = expr.rewrite(&mut const_evaluator).unwrap(); | ||
/// assert_eq!(rewritten, lit(3) + col("a")); | ||
/// ``` | ||
pub struct ConstEvaluator { | ||
pub struct ConstEvaluator<'a> { | ||
/// can_evaluate is used during the depth-first-search of the | ||
/// Expr tree to track if any siblings (or their descendants) were | ||
/// non evaluatable (e.g. had a column reference or volatile | ||
|
@@ -238,13 +238,12 @@ pub struct ConstEvaluator { | |
/// descendants) so this Expr can be evaluated | ||
can_evaluate: Vec<bool>, | ||
|
||
ctx_state: ExecutionContextState, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is reason 1 for this change: We no longer have to create a whole new |
||
planner: DefaultPhysicalPlanner, | ||
execution_props: &'a ExecutionProps, | ||
input_schema: DFSchema, | ||
input_batch: RecordBatch, | ||
} | ||
|
||
impl ExprRewriter for ConstEvaluator { | ||
impl<'a> ExprRewriter for ConstEvaluator<'a> { | ||
fn pre_visit(&mut self, expr: &Expr) -> Result<RewriteRecursion> { | ||
// Default to being able to evaluate this node | ||
self.can_evaluate.push(true); | ||
|
@@ -282,16 +281,11 @@ impl ExprRewriter for ConstEvaluator { | |
} | ||
} | ||
|
||
impl ConstEvaluator { | ||
impl<'a> ConstEvaluator<'a> { | ||
/// Create a new `ConstantEvaluator`. Session constants (such as | ||
/// the time for `now()` are taken from the passed | ||
/// `execution_props`. | ||
pub fn new(execution_props: &ExecutionProps) -> Self { | ||
let planner = DefaultPhysicalPlanner::default(); | ||
let ctx_state = ExecutionContextState { | ||
execution_props: execution_props.clone(), | ||
..ExecutionContextState::new() | ||
}; | ||
pub fn new(execution_props: &'a ExecutionProps) -> Self { | ||
let input_schema = DFSchema::empty(); | ||
|
||
// The dummy column name is unused and doesn't matter as only | ||
|
@@ -306,8 +300,7 @@ impl ConstEvaluator { | |
|
||
Self { | ||
can_evaluate: vec![], | ||
ctx_state, | ||
planner, | ||
execution_props, | ||
input_schema, | ||
input_batch, | ||
} | ||
|
@@ -364,11 +357,11 @@ impl ConstEvaluator { | |
return Ok(s); | ||
} | ||
|
||
let phys_expr = self.planner.create_physical_expr( | ||
let phys_expr = create_physical_expr( | ||
&expr, | ||
&self.input_schema, | ||
&self.input_batch.schema(), | ||
&self.ctx_state, | ||
self.execution_props, | ||
)?; | ||
let col_val = phys_expr.evaluate(&self.input_batch)?; | ||
match col_val { | ||
|
@@ -1141,6 +1134,7 @@ mod tests { | |
) { | ||
let execution_props = ExecutionProps { | ||
query_execution_start_time: *date_time, | ||
var_providers: None, | ||
}; | ||
|
||
let mut const_evaluator = ConstEvaluator::new(&execution_props); | ||
|
@@ -1622,6 +1616,7 @@ mod tests { | |
let rule = SimplifyExpressions::new(); | ||
let execution_props = ExecutionProps { | ||
query_execution_start_time: *date_time, | ||
var_providers: None, | ||
}; | ||
|
||
let err = rule | ||
|
@@ -1638,6 +1633,7 @@ mod tests { | |
let rule = SimplifyExpressions::new(); | ||
let execution_props = ExecutionProps { | ||
query_execution_start_time: *date_time, | ||
var_providers: None, | ||
}; | ||
|
||
let optimized_plan = rule | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,13 +37,14 @@ use arrow::{ | |
record_batch::RecordBatch, | ||
}; | ||
|
||
use crate::execution::context::ExecutionProps; | ||
use crate::physical_plan::planner::create_physical_expr; | ||
use crate::prelude::lit; | ||
use crate::{ | ||
error::{DataFusionError, Result}, | ||
execution::context::ExecutionContextState, | ||
logical_plan::{Column, DFSchema, Expr, Operator}, | ||
optimizer::utils, | ||
physical_plan::{planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr}, | ||
physical_plan::{ColumnarValue, PhysicalExpr}, | ||
}; | ||
|
||
/// Interface to pass statistics information to [`PruningPredicates`] | ||
|
@@ -129,12 +130,14 @@ impl PruningPredicate { | |
.collect::<Vec<_>>(); | ||
let stat_schema = Schema::new(stat_fields); | ||
let stat_dfschema = DFSchema::try_from(stat_schema.clone())?; | ||
let execution_context_state = ExecutionContextState::new(); | ||
let predicate_expr = DefaultPhysicalPlanner::default().create_physical_expr( | ||
|
||
// TODO allow these properties to be passed in | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is reason 2 for this change: again we save a whole new |
||
let execution_props = ExecutionProps::new(); | ||
let predicate_expr = create_physical_expr( | ||
&logical_predicate_expr, | ||
&stat_dfschema, | ||
&stat_schema, | ||
&execution_context_state, | ||
&execution_props, | ||
)?; | ||
Ok(Self { | ||
schema, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
pub fn register_variable(
API remains the same