Skip to content

Commit

Permalink
feat: eliminate useless join | convert inner to outer when condition …
Browse files Browse the repository at this point in the history
…is true (#6443)

* minor

* feat: eliminate useless join | convert inner to outer when condition is true
  • Loading branch information
jackwener authored May 26, 2023
1 parent 5c19eb1 commit a9f0c7a
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 19 deletions.
2 changes: 1 addition & 1 deletion datafusion/core/tests/simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyInfo};
/// about the expressions.
///
/// You can provide that information using DataFusion [DFSchema]
/// objects or from some other implemention
/// objects or from some other implementation
struct MyInfo {
/// The input schema
schema: DFSchema,
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ analyzed_logical_plan SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
Expand Down Expand Up @@ -175,6 +176,7 @@ logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
Expand Down
21 changes: 21 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,27 @@ with t1 as (select 1 as col1, 'asd' as col2),
t2 as (select 1 as col3, 'sdf' as col4)
select col2, col4 from t1 full outer join t2 on col1 = col3

# test eliminate join when condition is false
query TT
explain select * from t1 join t2 on false;
----
logical_plan EmptyRelation
physical_plan EmptyExec: produce_one_row=false

# test covert inner join to cross join when condition is true
query TT
explain select * from t1 inner join t2 on true;
----
logical_plan
CrossJoin:
--TableScan: t1 projection=[t1_id, t1_name, t1_int]
--TableScan: t2 projection=[t2_id, t2_name, t2_int]
physical_plan
CrossJoinExec
--CoalescePartitionsExec
----MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]

statement ok
drop table IF EXISTS t1;

Expand Down
7 changes: 1 addition & 6 deletions datafusion/expr/src/field_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ pub fn get_indexed_field(data_type: &DataType, key: &ScalarValue) -> Result<Fiel
))
} else {
let field = fields.iter().find(|f| f.name() == s);
match field {
None => Err(DataFusionError::Plan(format!(
"Field {s} not found in struct"
))),
Some(f) => Ok(f.as_ref().clone()),
}
field.ok_or(DataFusionError::Plan(format!("Field {s} not found in struct"))).map(|f| f.as_ref().clone())
}
}
(DataType::Struct(_), _) => Err(DataFusionError::Plan(
Expand Down
6 changes: 2 additions & 4 deletions datafusion/expr/src/logical_plan/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,8 @@ impl<'a, 'b> TreeNodeVisitor for GraphvizVisitor<'a, 'b> {
// always be non-empty as pre_visit always pushes
// So it should always be Ok(true)
let res = self.parent_ids.pop();
match res {
Some(_) => Ok(VisitRecursion::Continue),
None => Err(DataFusionError::Internal("Fail to format".to_string())),
}
res.ok_or(DataFusionError::Internal("Fail to format".to_string()))
.map(|_| VisitRecursion::Continue)
}
}

Expand Down
9 changes: 1 addition & 8 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,14 +203,7 @@ pub fn coerce_types(
};

// re-write the error message of failed coercions to include the operator's information
match result {
None => Err(DataFusionError::Plan(
format!(
"{lhs_type:?} {op} {rhs_type:?} can't be evaluated because there isn't a common type to coerce the types to"
),
)),
Some(t) => Ok(t)
}
result.ok_or(DataFusionError::Plan(format!("{lhs_type:?} {op} {rhs_type:?} can't be evaluated because there isn't a common type to coerce the types to")))
}

/// Coercion rules for mathematics operators between decimal and non-decimal types.
Expand Down
121 changes: 121 additions & 0 deletions datafusion/optimizer/src/eliminate_join.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::JoinType::Inner;
use datafusion_expr::{
logical_plan::{EmptyRelation, LogicalPlan},
CrossJoin, Expr,
};

/// Eliminates joins when inner join condition is false.
/// Replaces joins when inner join condition is true with a cross join.
#[derive(Default)]
pub struct EliminateJoin;

impl EliminateJoin {
pub fn new() -> Self {
Self {}
}
}

impl OptimizerRule for EliminateJoin {
fn try_optimize(
&self,
plan: &LogicalPlan,
_config: &dyn OptimizerConfig,
) -> Result<Option<LogicalPlan>> {
match plan {
LogicalPlan::Join(join) if join.join_type == Inner && join.on.is_empty() => {
match join.filter {
Some(Expr::Literal(ScalarValue::Boolean(Some(true)))) => {
Ok(Some(LogicalPlan::CrossJoin(CrossJoin {
left: join.left.clone(),
right: join.right.clone(),
schema: join.schema.clone(),
})))
}
Some(Expr::Literal(ScalarValue::Boolean(Some(false)))) => {
Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: join.schema.clone(),
})))
}
_ => Ok(None),
}
}
_ => Ok(None),
}
}

fn name(&self) -> &str {
"eliminate_join"
}

fn apply_order(&self) -> Option<ApplyOrder> {
Some(ApplyOrder::TopDown)
}
}

#[cfg(test)]
mod tests {
use crate::eliminate_join::EliminateJoin;
use crate::test::*;
use datafusion_common::{Column, Result, ScalarValue};
use datafusion_expr::JoinType::Inner;
use datafusion_expr::{logical_plan::builder::LogicalPlanBuilder, Expr, LogicalPlan};
use std::sync::Arc;

fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> {
assert_optimized_plan_eq(Arc::new(EliminateJoin::new()), plan, expected)
}

#[test]
fn join_on_false() -> Result<()> {
let plan = LogicalPlanBuilder::empty(false)
.join(
LogicalPlanBuilder::empty(false).build()?,
Inner,
(Vec::<Column>::new(), Vec::<Column>::new()),
Some(Expr::Literal(ScalarValue::Boolean(Some(false)))),
)?
.build()?;

let expected = "EmptyRelation";
assert_optimized_plan_equal(&plan, expected)
}

#[test]
fn join_on_true() -> Result<()> {
let plan = LogicalPlanBuilder::empty(false)
.join(
LogicalPlanBuilder::empty(false).build()?,
Inner,
(Vec::<Column>::new(), Vec::<Column>::new()),
Some(Expr::Literal(ScalarValue::Boolean(Some(true)))),
)?
.build()?;

let expected = "\
CrossJoin:\
\n EmptyRelation\
\n EmptyRelation";
assert_optimized_plan_equal(&plan, expected)
}
}
1 change: 1 addition & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub mod decorrelate_predicate_subquery;
pub mod eliminate_cross_join;
pub mod eliminate_duplicated_expr;
pub mod eliminate_filter;
pub mod eliminate_join;
pub mod eliminate_limit;
pub mod eliminate_outer_join;
pub mod eliminate_project;
Expand Down
2 changes: 2 additions & 0 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::decorrelate_predicate_subquery::DecorrelatePredicateSubquery;
use crate::eliminate_cross_join::EliminateCrossJoin;
use crate::eliminate_duplicated_expr::EliminateDuplicatedExpr;
use crate::eliminate_filter::EliminateFilter;
use crate::eliminate_join::EliminateJoin;
use crate::eliminate_limit::EliminateLimit;
use crate::eliminate_outer_join::EliminateOuterJoin;
use crate::eliminate_project::EliminateProjection;
Expand Down Expand Up @@ -210,6 +211,7 @@ impl Optimizer {
Arc::new(SimplifyExpressions::new()),
Arc::new(UnwrapCastInComparison::new()),
Arc::new(ReplaceDistinctWithAggregate::new()),
Arc::new(EliminateJoin::new()),
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),
Expand Down

0 comments on commit a9f0c7a

Please sign in to comment.