From b09e528e28ac957bdbbdb68c6b349a68be3bc0d7 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 17 Oct 2019 11:03:48 +0800 Subject: [PATCH] planner: fix constant propagation for AntiSemiJoin (#12728) Conflicts: cmd/explaintest/r/tpch.result planner/core/integration_test.go planner/core/logical_plan_builder.go planner/core/logical_plan_test.go --- cmd/explaintest/r/tpch.result | 2 +- planner/core/integration_test.go | 45 ++++++++++++++++++++++++ planner/core/logical_plan_builder.go | 7 +++- planner/core/rule_predicate_push_down.go | 31 +++++++++++----- 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index a66f1028d083f..5f5eb8929364b 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -1225,7 +1225,7 @@ id count task operator info Projection_25 100.00 root tpch.supplier.s_name, 17_col_0 └─TopN_28 100.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100 └─HashAgg_31 320000.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name) - └─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l3.l_suppkey, tpch.supplier.s_suppkey) + └─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey) ├─IndexJoin_84 4733394.87 root semi join, inner:IndexLookUp_83, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey) │ ├─HashLeftJoin_90 5916743.59 root inner join, inner:TableReader_119, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] │ │ ├─HashLeftJoin_95 147918589.81 root inner join, inner:TableReader_116, equal:[eq(tpch.l1.l_suppkey, tpch.supplier.s_suppkey)] diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 3d7590fca7858..843542efcbacf 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -127,3 +127,48 @@ func (s *testIntegrationSuite) TestSimplifyOuterJoinWithCast(c *C) { tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...)) } } + +func (s *testIntegrationSuite) TestAntiJoinConstProp(c *C) { + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + tk := testkit.NewTestKit(c, store) + defer func() { + dom.Close() + store.Close() + }() + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int not null, b int not null)") + tk.MustExec("insert into t1 values (1,1)") + tk.MustExec("create table t2(a int not null, b int not null)") + tk.MustExec("insert into t2 values (2,2)") + + tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.a = t1.a and t2.a > 1)").Check(testkit.Rows( + "1 1", + )) + tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.a > 1)").Check(testkit.Rows( + "1 1", + )) + tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b = t1.b and t2.b > 1)").Check(testkit.Rows( + "1 1", + )) + tk.MustQuery("select q.a in (select count(*) from t1 s where not exists (select 1 from t1 p where q.a > 1 and p.a = s.a)) from t1 q").Check(testkit.Rows( + "1", + )) + tk.MustQuery("select q.a in (select not exists (select 1 from t1 p where q.a > 1 and p.a = s.a) from t1 s) from t1 q").Check(testkit.Rows( + "1", + )) + + tk.MustExec("drop table t1, t2") + tk.MustExec("create table t1(a int not null, b int)") + tk.MustExec("insert into t1 values (1,null)") + tk.MustExec("create table t2(a int not null, b int)") + tk.MustExec("insert into t2 values (2,2)") + + tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t2.b > t1.b)").Check(testkit.Rows( + "1 ", + )) + tk.MustQuery("select * from t1 where t1.a not in (select a from t2 where t1.a = 2)").Check(testkit.Rows( + "1 ", + )) +} diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index eea9cda709175..6b78d16a8dcaf 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -202,9 +202,14 @@ func (p *LogicalJoin) pushDownConstExpr(expr expression.Expression, leftCond []e } else { leftCond = append(leftCond, expr) } - case SemiJoin, AntiSemiJoin, InnerJoin: + case SemiJoin, InnerJoin: leftCond = append(leftCond, expr) rightCond = append(rightCond, expr) + case AntiSemiJoin: + if filterCond { + leftCond = append(leftCond, expr) + } + rightCond = append(rightCond, expr) } return leftCond, rightCond } diff --git a/planner/core/rule_predicate_push_down.go b/planner/core/rule_predicate_push_down.go index 5f7569063a502..3a8dd48c9bc76 100644 --- a/planner/core/rule_predicate_push_down.go +++ b/planner/core/rule_predicate_push_down.go @@ -145,7 +145,7 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret p.LeftConditions = nil ret = append(expression.ScalarFuncs2Exprs(equalCond), otherCond...) ret = append(ret, leftPushCond...) - case SemiJoin, AntiSemiJoin, InnerJoin: + case SemiJoin, InnerJoin: tempCond := make([]expression.Expression, 0, len(p.LeftConditions)+len(p.RightConditions)+len(p.EqualConditions)+len(p.OtherConditions)+len(predicates)) tempCond = append(tempCond, p.LeftConditions...) tempCond = append(tempCond, p.RightConditions...) @@ -154,13 +154,10 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret tempCond = append(tempCond, predicates...) tempCond = expression.ExtractFiltersFromDNFs(p.ctx, tempCond) tempCond = expression.PropagateConstant(p.ctx, tempCond) - // Return table dual when filter is constant false or null. Not applicable to AntiSemiJoin. - // TODO: For AntiSemiJoin, we can use outer plan to substitute LogicalJoin actually. - if p.JoinType != AntiSemiJoin { - dual := conds2TableDual(p, tempCond) - if dual != nil { - return ret, dual - } + // Return table dual when filter is constant false or null. + dual := conds2TableDual(p, tempCond) + if dual != nil { + return ret, dual } equalCond, leftPushCond, rightPushCond, otherCond = p.extractOnCondition(tempCond, true, true) p.LeftConditions = nil @@ -169,6 +166,24 @@ func (p *LogicalJoin) PredicatePushDown(predicates []expression.Expression) (ret p.OtherConditions = otherCond leftCond = leftPushCond rightCond = rightPushCond + case AntiSemiJoin: + predicates = expression.PropagateConstant(p.ctx, predicates) + // Return table dual when filter is constant false or null. + dual := conds2TableDual(p, predicates) + if dual != nil { + return ret, dual + } + // `predicates` should only contain left conditions or constant filters. + _, leftPushCond, rightPushCond, _ = p.extractOnCondition(predicates, true, true) + // Do not derive `is not null` for anti join, since it may cause wrong results. + // For example: + // `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`, + // `select * from t t1 where t1.a not in (select a from t t2 where t1.b = t2.b` does not imply `t1.b is not null`, + // `select * from t t1 where not exists (select * from t t2 where t2.a = t1.a)` does not imply `t1.a is not null`, + leftCond = leftPushCond + rightCond = append(p.RightConditions, rightPushCond...) + p.RightConditions = nil + } leftRet, lCh := p.children[0].PredicatePushDown(leftCond) rightRet, rCh := p.children[1].PredicatePushDown(rightCond)