From e6d7e7b89e359e791e38e4c87b7839f0a9ea44e5 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Mon, 23 Nov 2020 17:34:53 +0800 Subject: [PATCH] planner: make index-hash-join and index-merge-join consider collation (#21201) Signed-off-by: wjhuang2016 --- expression/integration_test.go | 18 ++++++++++ planner/core/exhaust_physical_plans.go | 40 +++++++++++++-------- planner/core/exhaust_physical_plans_test.go | 2 +- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/expression/integration_test.go b/expression/integration_test.go index 2a86d240b9da6..f4af2a0cafd4b 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7737,3 +7737,21 @@ func (s *testIntegrationSerialSuite) TestIssue20608(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustQuery("select '䇇Հ' collate utf8mb4_bin like '___Հ';").Check(testkit.Rows("0")) } + +func (s *testIntegrationSerialSuite) TestCollationIndexJoin(c *C) { + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int, b char(10), key(b)) collate utf8mb4_general_ci") + tk.MustExec("create table t2(a int, b char(10), key(b)) collate ascii_bin") + tk.MustExec("insert into t1 values (1, 'a')") + tk.MustExec("insert into t2 values (1, 'A')") + + tk.MustQuery("select /*+ inl_join(t1) */ t1.b, t2.b from t1 join t2 where t1.b=t2.b").Check(testkit.Rows("a A")) + tk.MustQuery("select /*+ hash_join(t1) */ t1.b, t2.b from t1 join t2 where t1.b=t2.b").Check(testkit.Rows("a A")) + tk.MustQuery("select /*+ merge_join(t1) */ t1.b, t2.b from t1 join t2 where t1.b=t2.b").Check(testkit.Rows("a A")) + tk.MustQuery("select /*+ inl_hash_join(t1) */ t1.b, t2.b from t1 join t2 where t1.b=t2.b").Check(testkit.Rows("a A")) + tk.MustQuery("select /*+ inl_merge_join(t1) */ t1.b, t2.b from t1 join t2 where t1.b=t2.b").Check(testkit.Rows("a A")) +} diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 5af99616a8aec..2043603529984 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/plancodec" "github.com/pingcap/tidb/util/ranger" @@ -479,6 +480,16 @@ func (p *LogicalJoin) constructIndexJoin( IsNullEQ: newIsNullEQ, DefaultValues: p.DefaultValues, } + // Correct the collation used by hash. + for i := range outerHashKeys { + // Make compiler happy. + if len(innerHashKeys) == 0 { + return nil + } + chs, coll := expression.DeriveCollationFromExprs(nil, outerHashKeys[i], innerHashKeys[i]) + outerHashKeys[i].GetType().Charset, outerHashKeys[i].GetType().Collate = chs, coll + innerHashKeys[i].GetType().Charset, innerHashKeys[i].GetType().Collate = chs, coll + } join := PhysicalIndexJoin{ basePhysicalJoin: baseJoin, innerTask: innerTask, @@ -657,15 +668,14 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou return p.buildIndexJoinInner2IndexScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt) } -func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*expression.Column, - checkPathValid func(path *util.AccessPath) bool) (*indexJoinBuildHelper, []int) { +func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*expression.Column, checkPathValid func(path *util.AccessPath) bool, outerJoinKeys []*expression.Column) (*indexJoinBuildHelper, []int) { helper := &indexJoinBuildHelper{ join: p, innerPlan: ds, } for _, path := range ds.possibleAccessPaths { if checkPathValid(path) { - emptyRange, err := helper.analyzeLookUpFilters(path, ds, innerJoinKeys) + emptyRange, err := helper.analyzeLookUpFilters(path, ds, innerJoinKeys, outerJoinKeys) if emptyRange { return nil, nil } @@ -713,7 +723,7 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan( var innerTask, innerTask2 task var helper *indexJoinBuildHelper if ds.tableInfo.IsCommonHandle { - helper, keyOff2IdxOff = p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return path.IsCommonHandlePath }) + helper, keyOff2IdxOff = p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return path.IsCommonHandlePath }, outerJoinKeys) if helper == nil { return nil } @@ -774,7 +784,7 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan( func (p *LogicalJoin) buildIndexJoinInner2IndexScan( prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column, outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) { - helper, keyOff2IdxOff := p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return !path.IsTablePath() }) + helper, keyOff2IdxOff := p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return !path.IsTablePath() }, outerJoinKeys) if helper == nil { return nil } @@ -1250,16 +1260,18 @@ loopOtherConds: // It's clearly that the column c cannot be used to access data. So we need to remove it and reset the IdxOff2KeyOff to // [0 -1 -1]. // So that we can use t1.a=t2.a and t1.b > t2.b-10 and t1.b < t2.b+10 to build ranges then access data. -func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc( - idxCols []*expression.Column, - notKeyEqAndIn []expression.Expression) ( - usefulEqAndIn, uselessOnes []expression.Expression, -) { +func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc(idxCols []*expression.Column, notKeyEqAndIn []expression.Expression, outerJoinKeys []*expression.Column) (usefulEqAndIn, uselessOnes []expression.Expression) { ijHelper.curPossibleUsedKeys = make([]*expression.Column, 0, len(idxCols)) for idxColPos, notKeyColPos := 0, 0; idxColPos < len(idxCols); idxColPos++ { if ijHelper.curIdxOff2KeyOff[idxColPos] != -1 { - ijHelper.curPossibleUsedKeys = append(ijHelper.curPossibleUsedKeys, idxCols[idxColPos]) - continue + // Check collation is the new collation is enabled. + tmpType := make([]expression.Expression, 0) + expression.DeriveCollationFromExprs(nil, idxCols[idxColPos], outerJoinKeys[ijHelper.curIdxOff2KeyOff[idxColPos]]) + _, coll := expression.DeriveCollationFromExprs(nil, tmpType...) + if !collate.NewCollationEnabled() || collate.CompatibleCollate(idxCols[idxColPos].GetType().Collate, coll) { + ijHelper.curPossibleUsedKeys = append(ijHelper.curPossibleUsedKeys, idxCols[idxColPos]) + continue + } } if notKeyColPos < len(notKeyEqAndIn) && ijHelper.curNotUsedIndexCols[notKeyColPos].Equal(nil, idxCols[idxColPos]) { notKeyColPos++ @@ -1276,7 +1288,7 @@ func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc( return notKeyEqAndIn, nil } -func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column) (emptyRange bool, err error) { +func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column, outerJoinKeys []*expression.Column) (emptyRange bool, err error) { if len(path.IdxCols) == 0 { return false, nil } @@ -1284,7 +1296,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath ijHelper.resetContextForIndex(innerJoinKeys, path.IdxCols, path.IdxColLens) notKeyEqAndIn, remained, rangeFilterCandidates := ijHelper.findUsefulEqAndInFilters(innerPlan) var remainedEqAndIn []expression.Expression - notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.IdxCols, notKeyEqAndIn) + notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.IdxCols, notKeyEqAndIn, outerJoinKeys) matchedKeyCnt := len(ijHelper.curPossibleUsedKeys) // If no join key is matched while join keys actually are not empty. We don't choose index join for now. if matchedKeyCnt <= 0 && len(innerJoinKeys) > 0 { diff --git a/planner/core/exhaust_physical_plans_test.go b/planner/core/exhaust_physical_plans_test.go index 889e0f657392f..ed7a47b87cece 100644 --- a/planner/core/exhaust_physical_plans_test.go +++ b/planner/core/exhaust_physical_plans_test.go @@ -248,7 +248,7 @@ func (s *testUnitTestSuit) TestIndexJoinAnalyzeLookUpFilters(c *C) { c.Assert(err, IsNil) joinNode.OtherConditions = others helper := &indexJoinBuildHelper{join: joinNode, lastColManager: nil, innerPlan: dataSourceNode} - _, err = helper.analyzeLookUpFilters(path, dataSourceNode, tt.innerKeys) + _, err = helper.analyzeLookUpFilters(path, dataSourceNode, tt.innerKeys, tt.innerKeys) c.Assert(err, IsNil) c.Assert(fmt.Sprintf("%v", helper.chosenAccess), Equals, tt.accesses) c.Assert(fmt.Sprintf("%v", helper.chosenRanges), Equals, tt.ranges, Commentf("test case: #%v", i))