From d9d8ce23a8c9ea0ae9099916010fe9d5c42f2005 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 14 Jul 2021 16:32:20 +0800 Subject: [PATCH 01/21] refine index back factor of skyline prunning --- planner/core/find_best_task.go | 31 +++++++++++++++++++++++-------- planner/core/logical_plan_test.go | 8 ++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 251670ed14a9a..a261621fe1a76 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -415,10 +415,11 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { // candidatePath is used to maintain required info for skyline pruning. type candidatePath struct { - path *util.AccessPath - columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. - isSingleScan bool - isMatchProp bool + path *util.AccessPath + accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions. + indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters. + isSingleScan bool + isMatchProp bool } // compareColumnSet will compares the two set. The last return value is used to indicate @@ -451,6 +452,16 @@ func compareBool(l, r bool) int { return 1 } +func compareIndexBack(lhs, rhs *candidatePath) (int, bool) { + result := compareBool(lhs.isSingleScan, rhs.isSingleScan) + if result == 0 && !lhs.isSingleScan { + // if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters + // to compare how many table rows will be accessed. + return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet) + } + return result, true +} + // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions: // (1): the set of columns that occurred in the access condition, // (2): whether or not it matches the physical property @@ -458,11 +469,14 @@ func compareBool(l, r bool) int { // If `x` is not worse than `y` at all factors, // and there exists one factor that `x` is better than `y`, then `x` is better than `y`. func compareCandidates(lhs, rhs *candidatePath) int { - setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet) + setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet) + if !comparable { + return 0 + } + scanResult, comparable := compareIndexBack(lhs, rhs) if !comparable { return 0 } - scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan) matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp) sum := setsResult + scanResult + matchResult if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 { @@ -499,7 +513,7 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph } } } - candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) candidate.isSingleScan = true return candidate } @@ -519,7 +533,8 @@ func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.Ph } } } - candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) + candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters) candidate.isSingleScan = isSingleScan return candidate } diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index bb6786a7958b3..2f6fc2743ae77 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1700,6 +1700,14 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { sql: "select count(1) from t", result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix", }, + { + sql: "select * from t where e_str = 'hi' order by c", + result: "PRIMARY_KEY,c_d_e_str,c_d_e_str_prefix", + }, + { + sql: "select * from t where f > 3 and g = 5", + result: "PRIMARY_KEY,g,f_g", + }, } ctx := context.TODO() for i, tt := range tests { From b45895a8a598a6930b6114ba39369f36dd9b4d42 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 14 Jul 2021 16:47:49 +0800 Subject: [PATCH 02/21] fix test case --- planner/core/logical_plan_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 2f6fc2743ae77..ae2ad51c9f88f 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1694,7 +1694,7 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { }, { sql: "select * from t where f > 1 and g > 1", - result: "PRIMARY_KEY,f,g,f_g", + result: "PRIMARY_KEY,g,f_g", }, { sql: "select count(1) from t", From 25b39f53c3e3952676f1375021ea0eac24125329 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 14 Jul 2021 17:49:17 +0800 Subject: [PATCH 03/21] enhance isMatchProp --- planner/core/exhaust_physical_plans.go | 2 +- planner/core/find_best_task.go | 57 +++++++++++++------------- planner/core/logical_plan_test.go | 8 ++++ planner/core/logical_plans.go | 2 + planner/util/path.go | 2 + util/ranger/detacher.go | 26 +++++++++--- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index fb06f570fdce7..cfa114c47599b 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1251,7 +1251,7 @@ func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSo var remainedEqOrIn []expression.Expression // Extract the eq/in functions of possible join key. // you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value. - usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition( + usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, _ = ranger.ExtractEqAndInCondition( innerPlan.ctx, innerPlan.pushedDownConds, ijHelper.curNotUsedIndexCols, ijHelper.curNotUsedColLens, diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index a261621fe1a76..307a7c4c3ed30 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -488,31 +488,42 @@ func compareCandidates(lhs, rhs *candidatePath) int { return 0 } -func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { - candidate := &candidatePath{path: path} +func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool { + var isMatchProp bool if path.IsIntHandlePath { pkCol := ds.getPKIsHandleCol() if len(prop.SortItems) == 1 && pkCol != nil { - candidate.isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol) + isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol) if path.StoreType == kv.TiFlash { - candidate.isMatchProp = candidate.isMatchProp && !prop.SortItems[0].Desc + isMatchProp = isMatchProp && !prop.SortItems[0].Desc } } - } else { - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because - // it needs not to keep order for index scan. - if !prop.IsEmpty() && all { - for i, col := range path.IdxCols { - if col.Equal(nil, prop.SortItems[0].Col) { - candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems) - break - } else if i >= path.EqCondCount { - break - } + return isMatchProp + } + // TODO: do we need to consider TiFlash here? + all, _ := prop.AllSameOrder() + // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because + // it needs not to keep order for index scan. + if !prop.IsEmpty() && all { + for _, sortItem := range prop.SortItems { + var i, j int + if i < len(path.EqualCols) && sortItem.Col.Equal(nil, path.EqualCols[i]) { + i++ + j++ + } else if j < len(path.IdxCols) && path.IdxColLens[j] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[j]) { + j++ + } else { + isMatchProp = false + break } } } + return isMatchProp +} + +func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { + candidate := &candidatePath{path: path} + candidate.isMatchProp = ds.isMatchProp(path, prop) candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) candidate.isSingleScan = true return candidate @@ -520,19 +531,7 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath { candidate := &candidatePath{path: path} - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because - // it needs not to keep order for index scan. - if !prop.IsEmpty() && all { - for i, col := range path.IdxCols { - if col.Equal(nil, prop.SortItems[0].Col) { - candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems) - break - } else if i >= path.EqCondCount { - break - } - } - } + candidate.isMatchProp = ds.isMatchProp(path, prop) candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters) candidate.isSingleScan = isSingleScan diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index ae2ad51c9f88f..b3bdbbbf3b1f8 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1708,6 +1708,14 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { sql: "select * from t where f > 3 and g = 5", result: "PRIMARY_KEY,g,f_g", }, + { + sql: "select * from t where d = 3 order by c, e", + result: "PRIMARY_KEY,c_d_e", + }, + { + sql: "select * from t where c > 1 and d = 1 and e > 1 and e_str = 'hi' order by c, e", + result: "PRIMARY_KEY,c_d_e,c_d_e_str,c_d_e_str_prefix", + }, } ctx := context.TODO() for i, tt := range tests { diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 9113539116e8d..e6351c762d141 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -675,6 +675,7 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond + path.EqualCols = res.EqualCols path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return false, err @@ -854,6 +855,7 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond + path.EqualCols = res.EqualCols path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return err diff --git a/planner/util/path.go b/planner/util/path.go index f6fa0b47e0f51..79f04374703c6 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -43,6 +43,8 @@ type AccessPath struct { EqOrInCondCount int IndexFilters []expression.Expression TableFilters []expression.Expression + // EqualCols is the columns evaluated as constant under the given conditions. + EqualCols []*expression.Column // PartialIndexPaths store all index access paths. // If there are extra filters, store them in TableFilters. PartialIndexPaths []*AccessPath diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index f26e96c42d7f8..02aa33d6698ea 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -254,7 +254,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi ) res := &DetachRangeResult{} - accessConds, filterConds, newConditions, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) + accessConds, filterConds, newConditions, equalCols, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) if emptyRange { return res, nil } @@ -286,6 +286,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi res.Ranges = ranges res.AccessConds = accessConds res.RemainedConds = filterConds + res.EqualCols = equalCols if eqOrInCount == len(d.cols) || len(newConditions) == 0 { res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil @@ -465,15 +466,17 @@ func allEqOrIn(expr expression.Expression) bool { // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column. // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column. // e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions. +// equalCols: equalCols indicates which columns are evaluated as constant under the given conditions. // bool: indicate whether there's nil range when merging eq and in conditions. -func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, - cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) { +func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, + lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*expression.Column, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) points := make([][]*point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) newConditions := make([]expression.Expression, 0, len(conditions)) + equalCols := make([]*expression.Column, 0, len(cols)) offsets := make([]int, len(conditions)) for i, cond := range conditions { offset := getPotentialEqOrInColOffset(cond, cols) @@ -494,7 +497,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex points[offset] = rb.intersection(points[offset], rb.build(cond)) // Early termination if false expression found if len(points[offset]) == 0 { - return nil, nil, nil, true + return nil, nil, nil, nil, true } } for i, ma := range mergedAccesses { @@ -514,7 +517,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex accesses[i] = nil } else if len(points[i]) == 0 { // Early termination if false expression found - return nil, nil, nil, true + return nil, nil, nil, nil, true } else { // All Intervals are single points accesses[i] = points2EqOrInCond(sctx, points[i], cols[i]) @@ -527,6 +530,15 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex newConditions = append(newConditions, conditions[i]) } } + for _, cond := range accesses { + if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) { + if col, ok := f.GetArgs()[0].(*expression.Column); ok { + equalCols = append(equalCols, col) + } else if col, ok := f.GetArgs()[1].(*expression.Column); ok { + equalCols = append(equalCols, col) + } + } + } for i, cond := range accesses { if cond == nil { accesses = accesses[:i] @@ -546,7 +558,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex } // We should remove all accessConds, so that they will not be added to filter conditions. newConditions = removeAccessConditions(newConditions, accesses) - return accesses, filters, newConditions, false + return accesses, filters, newConditions, equalCols, false } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. @@ -619,6 +631,8 @@ type DetachRangeResult struct { AccessConds []expression.Expression // RemainedConds is the filter conditions which should be kept after access. RemainedConds []expression.Expression + // EqualCols is the columns evaluated as constant under the given conditions. + EqualCols []*expression.Column // EqCondCount is the number of equal conditions extracted. EqCondCount int // EqOrInCount is the number of equal/in conditions extracted. From 6ad7d5c22424d35dcdbefc4a33915eee78a05c4d Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 15 Jul 2021 12:41:29 +0800 Subject: [PATCH 04/21] fix ut --- planner/core/find_best_task.go | 24 ++++++++++++++++-------- planner/core/logical_plan_test.go | 10 +++++----- planner/core/mock.go | 4 ++-- planner/util/path.go | 6 +++--- util/ranger/detacher.go | 20 ++++++++++---------- 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 307a7c4c3ed30..6bcc4edd93c8e 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -501,18 +501,26 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical return isMatchProp } // TODO: do we need to consider TiFlash here? + // TODO: check is it ok to cache the optimization? all, _ := prop.AllSameOrder() // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because // it needs not to keep order for index scan. - if !prop.IsEmpty() && all { + if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) { + isMatchProp = true + i := 0 for _, sortItem := range prop.SortItems { - var i, j int - if i < len(path.EqualCols) && sortItem.Col.Equal(nil, path.EqualCols[i]) { - i++ - j++ - } else if j < len(path.IdxCols) && path.IdxColLens[j] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[j]) { - j++ - } else { + found := false + for ; i < len(path.IdxCols); i++ { + if path.IdxColLens[i] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[i]) { + found = true + i++ + break + } + if !path.EqualCols[i] { + break + } + } + if !found { isMatchProp = false break } diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index b3bdbbbf3b1f8..41626ef4bef86 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1701,11 +1701,11 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix", }, { - sql: "select * from t where e_str = 'hi' order by c", - result: "PRIMARY_KEY,c_d_e_str,c_d_e_str_prefix", + sql: "select * from t where f > 3 and g = 5", + result: "PRIMARY_KEY,g,f_g", }, { - sql: "select * from t where f > 3 and g = 5", + sql: "select * from t where g = 5 order by f", result: "PRIMARY_KEY,g,f_g", }, { @@ -1713,8 +1713,8 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) { result: "PRIMARY_KEY,c_d_e", }, { - sql: "select * from t where c > 1 and d = 1 and e > 1 and e_str = 'hi' order by c, e", - result: "PRIMARY_KEY,c_d_e,c_d_e_str,c_d_e_str_prefix", + sql: "select * from t where d = 1 and f > 1 and g > 1 order by c, e", + result: "PRIMARY_KEY,c_d_e,g,f_g", }, } ctx := context.TODO() diff --git a/planner/core/mock.go b/planner/core/mock.go index 42e6141980e90..eac3315fcdeac 100644 --- a/planner/core/mock.go +++ b/planner/core/mock.go @@ -43,9 +43,9 @@ func newDateType() types.FieldType { // MockSignedTable is only used for plan related tests. func MockSignedTable() *model.TableInfo { - // column: a, b, c, d, e, c_str, d_str, e_str, f, g + // column: a, b, c, d, e, c_str, d_str, e_str, f, g, h, i_date // PK: a - // indices: c_d_e, e, f, g, f_g, c_d_e_str, c_d_e_str_prefix + // indices: c_d_e, e, f, g, f_g, c_d_e_str, e_d_c_str_prefix indices := []*model.IndexInfo{ { Name: model.NewCIStr("c_d_e"), diff --git a/planner/util/path.go b/planner/util/path.go index 79f04374703c6..6dce8d43759fd 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -32,7 +32,9 @@ type AccessPath struct { FullIdxColLens []int IdxCols []*expression.Column IdxColLens []int - Ranges []*ranger.Range + // EqualCols indicates whether the column is constant under the given conditions for all index columns. + EqualCols []bool + Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 @@ -43,8 +45,6 @@ type AccessPath struct { EqOrInCondCount int IndexFilters []expression.Expression TableFilters []expression.Expression - // EqualCols is the columns evaluated as constant under the given conditions. - EqualCols []*expression.Column // PartialIndexPaths store all index access paths. // If there are extra filters, store them in TableFilters. PartialIndexPaths []*AccessPath diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 02aa33d6698ea..ab236d8c7b377 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -466,17 +466,17 @@ func allEqOrIn(expr expression.Expression) bool { // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column. // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column. // e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions. -// equalCols: equalCols indicates which columns are evaluated as constant under the given conditions. +// equalCols: equalCols indicates whether the column is constant under the given conditions for all index columns. // bool: indicate whether there's nil range when merging eq and in conditions. func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, - lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*expression.Column, bool) { + lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []bool, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) points := make([][]*point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) newConditions := make([]expression.Expression, 0, len(conditions)) - equalCols := make([]*expression.Column, 0, len(cols)) + equalCols := make([]bool, len(cols)) offsets := make([]int, len(conditions)) for i, cond := range conditions { offset := getPotentialEqOrInColOffset(cond, cols) @@ -530,12 +530,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex newConditions = append(newConditions, conditions[i]) } } - for _, cond := range accesses { + for i, cond := range accesses { if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) { - if col, ok := f.GetArgs()[0].(*expression.Column); ok { - equalCols = append(equalCols, col) - } else if col, ok := f.GetArgs()[1].(*expression.Column); ok { - equalCols = append(equalCols, col) + if _, ok := f.GetArgs()[0].(*expression.Column); ok { + equalCols[i] = true + } else if _, ok := f.GetArgs()[1].(*expression.Column); ok { + equalCols[i] = true } } } @@ -631,8 +631,8 @@ type DetachRangeResult struct { AccessConds []expression.Expression // RemainedConds is the filter conditions which should be kept after access. RemainedConds []expression.Expression - // EqualCols is the columns evaluated as constant under the given conditions. - EqualCols []*expression.Column + // EqualCols indicates whether the column is constant under the given conditions for all index columns. + EqualCols []bool // EqCondCount is the number of equal conditions extracted. EqCondCount int // EqOrInCount is the number of equal/in conditions extracted. From d24648488f0b2134e3a3d637cac9706d5e68ed86 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 15 Jul 2021 15:02:54 +0800 Subject: [PATCH 05/21] add test for isMatchProp --- planner/core/find_best_task.go | 2 +- planner/core/integration_test.go | 23 +++++++++++++++++++ .../core/testdata/integration_suite_in.json | 7 ++++++ .../core/testdata/integration_suite_out.json | 21 +++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 6bcc4edd93c8e..549caddf4c2cb 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -516,7 +516,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical i++ break } - if !path.EqualCols[i] { + if path.EqualCols == nil || !path.EqualCols[i] { break } } diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index ef078f912abf4..ad1bd91c5089d 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -3952,3 +3952,26 @@ func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C) tk.MustQuery("select * from v5").Check(testkit.Rows("1 2", "3 4")) } + +// TestIsMatchProp is used to test https://github.com/pingcap/tidb/issues/26017. +func (s *testIntegrationSuite) TestIsMatchProp(c *C) { + tk := testkit.NewTestKit(c, s.store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a))") + + var input []string + var output []struct { + SQL string + Plan []string + } + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows()) + }) + tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...)) + } +} diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index bf2391065a86a..792c7418d32e4 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -310,5 +310,12 @@ "select sum(1) from s1", "select count(1) as cnt from s1 union select count(1) as cnt from s2" ] + }, + { + "name": "TestIsMatchProp", + "cases": [ + "select a, b, c from t where a > 3 and b = 4 order by a, c", + "select * from t where d = 1 and b = 2 order by c, a" + ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 37330e65673c9..37526f17eb6cd 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1636,5 +1636,26 @@ ] } ] + }, + { + "Name": "TestIsMatchProp", + "Cases": [ + { + "SQL": "select a, b, c from t where a > 3 and b = 4 order by a, c", + "Plan": [ + "IndexReader 3.33 root index:Selection", + "└─Selection 3.33 cop[tikv] eq(test.t.b, 4)", + " └─IndexRangeScan 3333.33 cop[tikv] table:t, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select * from t where d = 1 and b = 2 order by c, a", + "Plan": [ + "IndexReader 0.01 root index:Selection", + "└─Selection 0.01 cop[tikv] eq(test.t.b, 2)", + " └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_d_c_b_a(d, c, b, a) range:[1,1], keep order:true, stats:pseudo" + ] + } + ] } ] From 0685f1555a35400b2b0671bac7f1415bd4ea9fef Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 15 Jul 2021 15:44:21 +0800 Subject: [PATCH 06/21] fmt --- planner/util/path.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/util/path.go b/planner/util/path.go index 6dce8d43759fd..16dd3c844ac3b 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -34,7 +34,7 @@ type AccessPath struct { IdxColLens []int // EqualCols indicates whether the column is constant under the given conditions for all index columns. EqualCols []bool - Ranges []*ranger.Range + Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 From 486fdc71f32b0278dd9493a7d83abf53c9054dff Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 21 Jul 2021 10:33:58 +0800 Subject: [PATCH 07/21] add comment --- planner/core/find_best_task.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 549caddf4c2cb..cbbbcf2a03c0b 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -505,6 +505,18 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical all, _ := prop.AllSameOrder() // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because // it needs not to keep order for index scan. + + // Basically, if `prop.SortItems` is the prefix of `path.IdxCols`, then `isMatchProp` is true. However, we need to consider + // the situations when some columns of `path.IdxCols` are evaluated as constant. For example: + // ``` + // create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a)); + // select * from t where a = 1 order by b, c; + // select * from t where b = 1 order by a, c; + // select * from t where d = 1 and b = 2 order by c, a; + // select * from t where d = 1 and b = 2 order by c, b, a; + // ``` + // In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a` + // matches the sort order. Hence, we use `path.EqualCols` to deal with the above situations. if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) { isMatchProp = true i := 0 From 7decc45c8b6d2051f913a698cdb66a8b02918a2a Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Fri, 23 Jul 2021 18:01:23 +0800 Subject: [PATCH 08/21] enhance detection of constant columns --- planner/core/find_best_task.go | 4 +- planner/core/integration_test.go | 5 +- planner/core/logical_plans.go | 14 +- .../core/testdata/integration_suite_in.json | 7 +- .../core/testdata/integration_suite_out.json | 34 +++- planner/util/path.go | 6 +- util/ranger/detacher.go | 165 +++++++++++++----- 7 files changed, 176 insertions(+), 59 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index cbbbcf2a03c0b..34ce53d77a514 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -516,7 +516,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical // select * from t where d = 1 and b = 2 order by c, b, a; // ``` // In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a` - // matches the sort order. Hence, we use `path.EqualCols` to deal with the above situations. + // matches the sort order. Hence, we use `path.ConstantCols` to deal with the above situations. if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) { isMatchProp = true i := 0 @@ -528,7 +528,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical i++ break } - if path.EqualCols == nil || !path.EqualCols[i] { + if path.ConstantCols == nil || !path.ConstantCols[i] { break } } diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index ad1bd91c5089d..e55ba43e9dbca 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -3958,8 +3958,9 @@ func (s *testIntegrationSuite) TestIsMatchProp(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a))") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int, b int, c int, d int, index idx_a_b_c(a, b, c))") + tk.MustExec("create table t2(a int, b int, c int, d int, index idx_a_b_c_d(a, b, c, d))") var input []string var output []struct { diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index e6351c762d141..abc6250f6fdbf 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -675,7 +675,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond - path.EqualCols = res.EqualCols + path.ConstantCols = make([]bool, len(path.IdxCols)) + if res.ColumnValues != nil { + for i := range path.ConstantCols { + path.ConstantCols[i] = res.ColumnValues[i] != nil + } + } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return false, err @@ -855,7 +860,12 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond - path.EqualCols = res.EqualCols + path.ConstantCols = make([]bool, len(path.IdxCols)) + if res.ColumnValues != nil { + for i := range path.ConstantCols { + path.ConstantCols[i] = res.ColumnValues[i] != nil + } + } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { return err diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 792c7418d32e4..e7e9bb12e6001 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -314,8 +314,11 @@ { "name": "TestIsMatchProp", "cases": [ - "select a, b, c from t where a > 3 and b = 4 order by a, c", - "select * from t where d = 1 and b = 2 order by c, a" + "select a, b, c from t1 where a > 3 and b = 4 order by a, c", + "select * from t2 where a = 1 and c = 2 order by b, d", + "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c", + "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", + "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 37526f17eb6cd..93025e19cf817 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1641,19 +1641,41 @@ "Name": "TestIsMatchProp", "Cases": [ { - "SQL": "select a, b, c from t where a > 3 and b = 4 order by a, c", + "SQL": "select a, b, c from t1 where a > 3 and b = 4 order by a, c", "Plan": [ "IndexReader 3.33 root index:Selection", - "└─Selection 3.33 cop[tikv] eq(test.t.b, 4)", - " └─IndexRangeScan 3333.33 cop[tikv] table:t, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo" + "└─Selection 3.33 cop[tikv] eq(test.t1.b, 4)", + " └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo" ] }, { - "SQL": "select * from t where d = 1 and b = 2 order by c, a", + "SQL": "select * from t2 where a = 1 and c = 2 order by b, d", "Plan": [ "IndexReader 0.01 root index:Selection", - "└─Selection 0.01 cop[tikv] eq(test.t.b, 2)", - " └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_d_c_b_a(d, c, b, a) range:[1,1], keep order:true, stats:pseudo" + "└─Selection 0.01 cop[tikv] eq(test.t2.c, 2)", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c", + "Plan": [ + "IndexReader 0.03 root index:IndexRangeScan", + "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo", + ] + }, + { + "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", + "Plan": [ + "IndexReader 0.67 root index:IndexRangeScan", + "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo", + ] + }, + { + "SQL": "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d", + "Plan": [ + "IndexReader 0.00 root index:Selection", + "└─Selection 0.00 cop[tikv] eq(test.t2.c, 3), or(and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), lt(test.t2.d, 3))), and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), gt(test.t2.d, 6))))", + " └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo" ] } ] diff --git a/planner/util/path.go b/planner/util/path.go index 16dd3c844ac3b..5d1af4a9f17a8 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -32,9 +32,9 @@ type AccessPath struct { FullIdxColLens []int IdxCols []*expression.Column IdxColLens []int - // EqualCols indicates whether the column is constant under the given conditions for all index columns. - EqualCols []bool - Ranges []*ranger.Range + // ConstantCols indicates whether the column is constant under the given conditions for all index columns. + ConstantCols []bool + Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index ab236d8c7b377..3c3258f4ef986 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -185,43 +185,40 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression. // is totally composed of point range filters. // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. -func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, error) { +func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, []*valueInfo, error) { if len(conds) < 2 { - return nil, -1, nil + return nil, -1, nil, nil } var r *DetachRangeResult + columnValues := make([]*valueInfo, len(cols)) maxNumCols := int(0) offset := int(-1) for i, cond := range conds { tmpConds := []expression.Expression{cond} colSets := expression.ExtractColumnSet(tmpConds) - origColNum := colSets.Len() - if origColNum == 0 { + if colSets.Len() == 0 { continue } - if l := len(cols); origColNum > l { - origColNum = l - } - currCols := cols[:origColNum] - currLengths := lengths[:origColNum] - res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, currCols, currLengths) + res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, cols, lengths) if err != nil { - return nil, -1, err + return nil, -1, nil, err } if len(res.Ranges) == 0 { - return &DetachRangeResult{}, -1, nil + return &DetachRangeResult{}, -1, nil, nil } + // take the union of the two columnValues + columnValues = unionColumnValues(columnValues, res.ColumnValues, len(cols)) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } sameLens, allPoints := true, true numCols := int(0) - for i, ran := range res.Ranges { + for j, ran := range res.Ranges { if !ran.IsPoint(sctx.GetSessionVars().StmtCtx) { allPoints = false break } - if i == 0 { + if j == 0 { numCols = len(ran.LowVal) } else if numCols != len(ran.LowVal) { sameLens = false @@ -240,7 +237,21 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E if r != nil { r.IsDNFCond = false } - return r, offset, nil + return r, offset, columnValues, nil +} + +func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo { + if lhs == nil { + lhs = make([]*valueInfo, numCols) + } + if rhs != nil { + for i, valInfo := range lhs { + if valInfo == nil && rhs[i] != nil { + lhs[i] = rhs[i] + } + } + } + return lhs } // detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and` @@ -254,7 +265,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi ) res := &DetachRangeResult{} - accessConds, filterConds, newConditions, equalCols, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) + accessConds, filterConds, newConditions, columnValues, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths) if emptyRange { return res, nil } @@ -286,7 +297,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi res.Ranges = ranges res.AccessConds = accessConds res.RemainedConds = filterConds - res.EqualCols = equalCols + res.ColumnValues = columnValues if eqOrInCount == len(d.cols) || len(newConditions) == 0 { res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil @@ -297,15 +308,17 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi shouldReserve: d.lengths[eqOrInCount] != types.UnspecifiedLength, } if considerDNF { - pointRes, offset, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths) + pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths) if err != nil { return nil, err } + res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues, len(d.cols)) if pointRes != nil { if len(pointRes.Ranges) == 0 { return &DetachRangeResult{}, nil } if len(pointRes.Ranges[0].LowVal) > eqOrInCount { + pointRes.ColumnValues = res.ColumnValues res = pointRes pointRanges = pointRes.Ranges eqOrInCount = len(res.Ranges[0].LowVal) @@ -461,22 +474,42 @@ func allEqOrIn(expr expression.Expression) bool { return false } +func extractValueInfo(expr expression.Expression) *valueInfo { + if f, ok := expr.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) { + getValueInfo := func(c *expression.Constant) *valueInfo { + mutable := c.ParamMarker != nil || c.DeferredExpr != nil + var value *types.Datum + if !mutable { + value = &c.Value + } + return &valueInfo{mutable, value} + } + if c, ok := f.GetArgs()[0].(*expression.Constant); ok { + return getValueInfo(c) + } + if c, ok := f.GetArgs()[1].(*expression.Constant); ok { + return getValueInfo(c) + } + } + return nil +} + // ExtractEqAndInCondition will split the given condition into three parts by the information of index columns and their lengths. // accesses: The condition will be used to build range. // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column. // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column. // e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions. -// equalCols: equalCols indicates whether the column is constant under the given conditions for all index columns. +// columnValues: the constant column values for all index columns. columnValues[i] is nil if cols[i] is not constant. // bool: indicate whether there's nil range when merging eq and in conditions. func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, - lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []bool, bool) { + lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*valueInfo, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} accesses := make([]expression.Expression, len(cols)) points := make([][]*point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) newConditions := make([]expression.Expression, 0, len(conditions)) - equalCols := make([]bool, len(cols)) + columnValues := make([]*valueInfo, len(cols)) offsets := make([]int, len(conditions)) for i, cond := range conditions { offset := getPotentialEqOrInColOffset(cond, cols) @@ -505,6 +538,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex if accesses[i] != nil { if allEqOrIn(accesses[i]) { newConditions = append(newConditions, accesses[i]) + columnValues[i] = extractValueInfo(accesses[i]) } else { accesses[i] = nil } @@ -522,6 +556,9 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // All Intervals are single points accesses[i] = points2EqOrInCond(sctx, points[i], cols[i]) newConditions = append(newConditions, accesses[i]) + if f, ok := accesses[i].(*expression.ScalarFunction); ok && f.FuncName.L == ast.EQ { + columnValues[i] = &valueInfo{mutable: true} + } sctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true } } @@ -530,15 +567,6 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex newConditions = append(newConditions, conditions[i]) } } - for i, cond := range accesses { - if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) { - if _, ok := f.GetArgs()[0].(*expression.Column); ok { - equalCols[i] = true - } else if _, ok := f.GetArgs()[1].(*expression.Column); ok { - equalCols[i] = true - } - } - } for i, cond := range accesses { if cond == nil { accesses = accesses[:i] @@ -558,12 +586,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex } // We should remove all accessConds, so that they will not be added to filter conditions. newConditions = removeAccessConditions(newConditions, accesses) - return accesses, filters, newConditions, equalCols, false + return accesses, filters, newConditions, columnValues, false } // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF. // We will detach the conditions of every DNF items, then compose them to a DNF. -func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, bool, error) { +func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, []*valueInfo, bool, error) { sc := d.sctx.GetSessionVars().StmtCtx firstColumnChecker := &conditionChecker{ colUniqueID: d.cols[0].UniqueID, @@ -574,26 +602,46 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression dnfItems := expression.FlattenDNFConditions(condition) newAccessItems := make([]expression.Expression, 0, len(dnfItems)) var totalRanges []*Range + columnValues := make([]*valueInfo, len(d.cols)) hasResidual := false - for _, item := range dnfItems { + for i, item := range dnfItems { if sf, ok := item.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd { cnfItems := expression.FlattenCNFConditions(sf) var accesses, filters []expression.Expression res, err := d.detachCNFCondAndBuildRangeForIndex(cnfItems, newTpSlice, true) if err != nil { - return nil, nil, false, nil + return nil, nil, nil, false, nil } ranges := res.Ranges accesses = res.AccessConds filters = res.RemainedConds if len(accesses) == 0 { - return FullRange(), nil, true, nil + return FullRange(), nil, nil, true, nil } if len(filters) > 0 { hasResidual = true } totalRanges = append(totalRanges, ranges...) newAccessItems = append(newAccessItems, expression.ComposeCNFCondition(d.sctx, accesses...)) + if res.ColumnValues != nil { + if i == 0 { + columnValues = res.ColumnValues + } else { + // take the intersection of the two columnValues + for j, valInfo := range columnValues { + if valInfo == nil { + continue + } + sameVale, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j]) + if err != nil { + return nil, nil, nil, false, errors.Trace(err) + } + if !sameVale { + columnValues[j] = nil + } + } + } + } } else if firstColumnChecker.check(item) { if firstColumnChecker.shouldReserve { hasResidual = true @@ -602,12 +650,24 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression points := rb.build(item) ranges, err := points2Ranges(sc, points, newTpSlice[0]) if err != nil { - return nil, nil, false, errors.Trace(err) + return nil, nil, nil, false, errors.Trace(err) } totalRanges = append(totalRanges, ranges...) newAccessItems = append(newAccessItems, item) + if i == 0 { + columnValues[0] = extractValueInfo(item) + } else if columnValues[0] != nil { + valInfo := extractValueInfo(item) + sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, columnValues[0], valInfo) + if err != nil { + return nil, nil, nil, false, errors.Trace(err) + } + if !sameValue { + columnValues[0] = nil + } + } } else { - return FullRange(), nil, true, nil + return FullRange(), nil, nil, true, nil } } @@ -617,10 +677,29 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression } totalRanges, err := UnionRanges(sc, totalRanges, d.mergeConsecutive) if err != nil { - return nil, nil, false, errors.Trace(err) + return nil, nil, nil, false, errors.Trace(err) } - return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, hasResidual, nil + return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, columnValues, hasResidual, nil +} + +// valueInfo is used for recording the constant column value in DetachCondAndBuildRangeForIndex. +type valueInfo struct { + mutable bool // If true, the constant column value depends on mutable constant. + value *types.Datum // If not mutable, value is the constant column value. Otherwise value is nil. +} + +func isSameValue(sc *stmtctx.StatementContext, lhs, rhs *valueInfo) (bool, error) { + // We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable. Maybe we can improve it later. + // TODO: is `lhs.value.Kind() != rhs.value.Kind()` necessary? + if lhs == nil || rhs == nil || lhs.mutable || rhs.mutable || lhs.value.Kind() != rhs.value.Kind() { + return false, nil + } + cmp, err := lhs.value.CompareDatum(sc, rhs.value) + if err != nil { + return false, err + } + return cmp == 0, nil } // DetachRangeResult wraps up results when detaching conditions and builing ranges. @@ -631,8 +710,9 @@ type DetachRangeResult struct { AccessConds []expression.Expression // RemainedConds is the filter conditions which should be kept after access. RemainedConds []expression.Expression - // EqualCols indicates whether the column is constant under the given conditions for all index columns. - EqualCols []bool + // ColumnValues records the constant column values for all index columns. + // For the ith column, if it is evaluated as constant, ColumnValues[i] is its value. Otherwise ColumnValues[i] is nil. + ColumnValues []*valueInfo // EqCondCount is the number of equal conditions extracted. EqCondCount int // EqOrInCount is the number of equal/in conditions extracted. @@ -671,12 +751,13 @@ func (d *rangeDetacher) detachCondAndBuildRangeForCols() (*DetachRangeResult, er } if len(d.allConds) == 1 { if sf, ok := d.allConds[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicOr { - ranges, accesses, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice) + ranges, accesses, columnValues, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice) if err != nil { return res, errors.Trace(err) } res.Ranges = ranges res.AccessConds = accesses + res.ColumnValues = columnValues res.IsDNFCond = true // If this DNF have something cannot be to calculate range, then all this DNF should be pushed as filter condition. if hasResidual { From b2d975c71995d4962e2006b0dabbbbb73f017780 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Tue, 27 Jul 2021 10:44:23 +0800 Subject: [PATCH 09/21] fix ut & add comment --- planner/core/find_best_task.go | 6 ++---- planner/core/logical_plans.go | 12 ++++++------ planner/core/testdata/integration_suite_out.json | 4 ++-- planner/util/path.go | 6 +++--- util/ranger/detacher.go | 7 ++++++- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 34ce53d77a514..4d414153ba15a 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -500,8 +500,6 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical } return isMatchProp } - // TODO: do we need to consider TiFlash here? - // TODO: check is it ok to cache the optimization? all, _ := prop.AllSameOrder() // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because // it needs not to keep order for index scan. @@ -516,7 +514,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical // select * from t where d = 1 and b = 2 order by c, b, a; // ``` // In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a` - // matches the sort order. Hence, we use `path.ConstantCols` to deal with the above situations. + // matches the sort order. Hence, we use `path.ConstCols` to deal with the above situations. if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) { isMatchProp = true i := 0 @@ -528,7 +526,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical i++ break } - if path.ConstantCols == nil || !path.ConstantCols[i] { + if path.ConstCols == nil || !path.ConstCols[i] { break } } diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index abc6250f6fdbf..63f4d2f8c6002 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -675,10 +675,10 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond - path.ConstantCols = make([]bool, len(path.IdxCols)) + path.ConstCols = make([]bool, len(path.IdxCols)) if res.ColumnValues != nil { - for i := range path.ConstantCols { - path.ConstantCols[i] = res.ColumnValues[i] != nil + for i := range path.ConstCols { + path.ConstCols[i] = res.ColumnValues[i] != nil } } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) @@ -860,10 +860,10 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex path.EqCondCount = res.EqCondCount path.EqOrInCondCount = res.EqOrInCount path.IsDNFCond = res.IsDNFCond - path.ConstantCols = make([]bool, len(path.IdxCols)) + path.ConstCols = make([]bool, len(path.IdxCols)) if res.ColumnValues != nil { - for i := range path.ConstantCols { - path.ConstantCols[i] = res.ColumnValues[i] != nil + for i := range path.ConstCols { + path.ConstCols[i] = res.ColumnValues[i] != nil } } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 93025e19cf817..b93d2ef382647 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1660,14 +1660,14 @@ "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c", "Plan": [ "IndexReader 0.03 root index:IndexRangeScan", - "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo", + "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo" ] }, { "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", "Plan": [ "IndexReader 0.67 root index:IndexRangeScan", - "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo", + "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo" ] }, { diff --git a/planner/util/path.go b/planner/util/path.go index 5d1af4a9f17a8..10e994e998a22 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -32,9 +32,9 @@ type AccessPath struct { FullIdxColLens []int IdxCols []*expression.Column IdxColLens []int - // ConstantCols indicates whether the column is constant under the given conditions for all index columns. - ConstantCols []bool - Ranges []*ranger.Range + // ConstCols indicates whether the column is constant under the given conditions for all index columns. + ConstCols []bool + Ranges []*ranger.Range // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. // For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters. CountAfterAccess float64 diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 3c3258f4ef986..57d1791d7d5fd 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -557,6 +557,8 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex accesses[i] = points2EqOrInCond(sctx, points[i], cols[i]) newConditions = append(newConditions, accesses[i]) if f, ok := accesses[i].(*expression.ScalarFunction); ok && f.FuncName.L == ast.EQ { + // Actually the constant column value may not be mutable. Here we assume it is mutable to keep it simple. + // Maybe we can improve it later. columnValues[i] = &valueInfo{mutable: true} } sctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true @@ -690,7 +692,10 @@ type valueInfo struct { } func isSameValue(sc *stmtctx.StatementContext, lhs, rhs *valueInfo) (bool, error) { - // We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable. Maybe we can improve it later. + // We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable to keep it simple. If we consider + // mutable valueInfo, we need to set `sc.OptimDependOnMutableConst = true`, which makes the plan not able to be cached. + // On the other hand, the equal condition may not be used for optimization. Hence we simply regard mutable valueInfos different + // from others. Maybe we can improve it later. // TODO: is `lhs.value.Kind() != rhs.value.Kind()` necessary? if lhs == nil || rhs == nil || lhs.mutable || rhs.mutable || lhs.value.Kind() != rhs.value.Kind() { return false, nil From aadd749e0edde8833eae0800ea8345ba80826733 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Tue, 27 Jul 2021 20:52:06 +0800 Subject: [PATCH 10/21] minor fix --- planner/core/find_best_task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 34e06e46093a1..60bd526b0c3c6 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -525,7 +525,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical i++ break } - if path.ConstCols == nil || !path.ConstCols[i] { + if path.ConstCols == nil || i >= len(path.ConstCols) || !path.ConstCols[i] { break } } From 3e73ab99ff8756052be4b953cfdbb39ab76e9c8b Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 28 Jul 2021 23:01:56 +0800 Subject: [PATCH 11/21] add heuristics in DataSource.DeriveStats --- planner/core/logical_plans.go | 77 +++++---------------------- planner/core/stats.go | 99 ++++++++++++++++++++++++++++------- planner/util/path.go | 34 ++++++++++++ util/ranger/detacher.go | 6 +-- 4 files changed, 129 insertions(+), 87 deletions(-) diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 45703caa4a828..2d7ec8ffa4822 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -655,19 +655,19 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) { return gathers } -func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) (bool, error) { +func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error { path.CountAfterAccess = float64(ds.statisticTable.Count) path.Ranges = ranger.FullNotNullRange() path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index) path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) if len(conds) == 0 { - return false, nil + return nil } sc := ds.ctx.GetSessionVars().StmtCtx if len(path.IdxCols) != 0 { res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.IdxCols, path.IdxColLens) if err != nil { - return false, err + return err } path.Ranges = res.Ranges path.AccessConds = res.AccessConds @@ -683,7 +683,7 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co } path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { - return false, err + return err } } else { path.TableFilters = conds @@ -712,33 +712,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co if path.CountAfterAccess < ds.stats.RowCount && !isIm { path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count)) } - // Check whether there's only point query. - noIntervalRanges := true - haveNullVal := false - for _, ran := range path.Ranges { - // Not point or the not full matched. - if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) { - noIntervalRanges = false - break - } - // Check whether there's null value. - for i := 0; i < len(path.Index.Columns); i++ { - if ran.HighVal[i].IsNull() { - haveNullVal = true - break - } - } - if haveNullVal { - break - } - } - return noIntervalRanges && !haveNullVal, nil + return nil } // deriveTablePathStats will fulfill the information that the AccessPath need. -// And it will check whether the primary key is covered only by point query. // isIm indicates whether this function is called to generate the partial path for IndexMerge. -func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) (bool, error) { +func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error { if path.IsCommonHandlePath { return ds.deriveCommonHandleTablePathStats(path, conds, isIm) } @@ -759,12 +738,12 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres } if pkCol == nil { path.Ranges = ranger.FullIntRange(isUnsigned) - return false, nil + return nil } path.Ranges = ranger.FullIntRange(isUnsigned) if len(conds) == 0 { - return false, nil + return nil } path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol) // If there's no access cond, we try to find that whether there's expression containing correlated column that @@ -800,11 +779,11 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres } if corColInAccessConds { path.CountAfterAccess = 1 - return true, nil + return nil } path.Ranges, err = ranger.BuildTableRange(path.AccessConds, sc, pkCol.RetType) if err != nil { - return false, err + return err } path.CountAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.Ranges) // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. @@ -812,15 +791,7 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres if path.CountAfterAccess < ds.stats.RowCount && !isIm { path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count)) } - // Check whether the primary key is covered by point query. - noIntervalRange := true - for _, ran := range path.Ranges { - if !ran.IsPoint(sc) { - noIntervalRange = false - break - } - } - return noIntervalRange, err + return err } func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Expression) error { @@ -877,12 +848,9 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex } // deriveIndexPathStats will fulfill the information that the AccessPath need. -// And it will check whether this index is full matched by point query. We will use this check to -// determine whether we remove other paths or not. // conds is the conditions used to generate the DetachRangeResult for path. // isIm indicates whether this function is called to generate the partial path for IndexMerge. -func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) bool { - sc := ds.ctx.GetSessionVars().StmtCtx +func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) { if path.EqOrInCondCount == len(path.AccessConds) { accesses, remained := path.SplitCorColAccessCondFromFilters(ds.ctx, path.EqOrInCondCount) path.AccessConds = append(path.AccessConds, accesses...) @@ -922,27 +890,6 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expres path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.stats.RowCount) } } - // Check whether there's only point query. - noIntervalRanges := true - haveNullVal := false - for _, ran := range path.Ranges { - // Not point or the not full matched. - if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) { - noIntervalRanges = false - break - } - // Check whether there's null value. - for i := 0; i < len(path.Index.Columns); i++ { - if ran.HighVal[i].IsNull() { - haveNullVal = true - break - } - } - if haveNullVal { - break - } - } - return noIntervalRanges && !haveNullVal } func getPKIsHandleColFromSchema(cols []*model.ColumnInfo, schema *expression.Schema, pkIsHandle bool) *expression.Column { diff --git a/planner/core/stats.go b/planner/core/stats.go index 35fc31687e749..67e6bf8c4f773 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -15,6 +15,7 @@ package core import ( "context" + "golang.org/x/tools/container/intsets" "math" "sort" @@ -280,30 +281,90 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * return nil, err } } + // TODO: Can we move ds.deriveStatsByFilter after pruning by heuristics? In this way some computation can be avoided + // when ds.possibleAccessPaths are pruned. ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths) + uniqueIdxsWithDoubleScan := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths)) + singleScanIdxs := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths)) + var selected, uniqueBest, refinedBest *util.AccessPath for _, path := range ds.possibleAccessPaths { if path.IsTablePath() { - noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds, false) + err := ds.deriveTablePathStats(path, ds.pushedDownConds, false) if err != nil { return nil, err } - // If we have point or empty range, just remove other possible paths. - if noIntervalRanges || len(path.Ranges) == 0 { - ds.possibleAccessPaths[0] = path - ds.possibleAccessPaths = ds.possibleAccessPaths[:1] - ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true - break - } - continue + } else { + ds.deriveIndexPathStats(path, ds.pushedDownConds, false) } - noIntervalRanges := ds.deriveIndexPathStats(path, ds.pushedDownConds, false) - // If we have empty range, or point range on unique index, just remove other possible paths. - if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { - ds.possibleAccessPaths[0] = path - ds.possibleAccessPaths = ds.possibleAccessPaths[:1] - ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true + // TODO: Should we handle TiFlash case specially? + // Try some heuristic rules to select access path. + if len(path.Ranges) == 0 { + selected = path break } + // TODO: Can we record isSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + // as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path. + if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) { + if path.IsTablePath() || path.Index.Unique { + if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) { + selected = path + break + } + uniqueIdxsWithDoubleScan = append(uniqueIdxsWithDoubleScan, path) + } + } else if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) { + singleScanIdxs = append(singleScanIdxs, path) + } + } + if len(uniqueIdxsWithDoubleScan) > 0 { + // TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning. + uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan)) + for _, uniqueIdx := range uniqueIdxsWithDoubleScan { + uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds)) + // Find the unique index with the minimal number of ranges as `uniqueBest`. + if uniqueBest == nil || len(uniqueIdx.Ranges) < len(uniqueBest.Ranges) { + uniqueBest = uniqueIdx + } + } + // `uniqueBest` may not always be the best. + // ``` + // create table t(a int, b int, c int, unique index idx_b(b), unique index idx_b_c(b, c)); + // select b, c from t where b = 5 and c > 10; + // ``` + // In the case, `uniqueBest` is `idx_b`. However, `idx_b_c` is better than `idx_b_c`. + // Hence, for each index in `singleScanIdxs`, we check whether it is better than some index in `uniqueIdxsWithDoubleScan`. + // If yes, the index is a refined one. We find the refined index with the minimal number of ranges as `refineBest`. + for _, singleScanIdx := range singleScanIdxs { + columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds) + for _, uniqueIdxColumnSet := range uniqueIdxColumnSets { + setsResult, comparable := compareColumnSet(columnSet, uniqueIdxColumnSet) + if comparable && setsResult == 1 { + if refinedBest == nil || len(singleScanIdx.Ranges) < len(refinedBest.Ranges) { + refinedBest = singleScanIdx + } + } + } + } + // `refineBest` may not always be better than `uniqueBest`. + // ``` + // create table t(int a, int b, int c, int d, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d)); + // select a, b, c from t where a = 1 and b = 2 and c in (1, 2, 3, 4, 5); + // ``` + // In the case, `refinedBest` is `idx_b_c_a_d` and `uniqueBest` is `a`. `idx_b_c_a_d` needs to access five points while `idx_a` + // only needs one point access and one table access. + // Hence we should compare `2 * len(uniqueBest.Ranges)` and `len(refinedBest.Ranges)` to select the better one. + if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) { + selected = refinedBest + } else { + selected = uniqueBest + } + } + // If some path matches a heuristic rule, just remove other possible paths + if selected != nil { + ds.possibleAccessPaths[0] = selected + ds.possibleAccessPaths = ds.possibleAccessPaths[:1] + // TODO: Can we make a more carefull check on whether the optimization depends on mutable constants? + ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true } // TODO: implement UnionScan + IndexMerge @@ -513,7 +574,7 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us } else { path.IsIntHandlePath = true } - noIntervalRanges, err := ds.deriveTablePathStats(path, conditions, true) + err := ds.deriveTablePathStats(path, conditions, true) if err != nil { logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) continue @@ -523,7 +584,7 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us continue } // If we have point or empty range, just remove other possible paths. - if noIntervalRanges || len(path.Ranges) == 0 { + if len(path.Ranges) == 0 || path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) { if len(results) == 0 { results = append(results, path) } else { @@ -543,13 +604,13 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) continue } - noIntervalRanges := ds.deriveIndexPathStats(path, conditions, true) + ds.deriveIndexPathStats(path, conditions, true) // If the path contains a full range, ignore it. if ranger.HasFullRange(path.Ranges) { continue } // If we have empty range, or point range on unique index, just remove other possible paths. - if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { + if len(path.Ranges) == 0 || (path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) && path.Index.Unique) { if len(results) == 0 { results = append(results, path) } else { diff --git a/planner/util/path.go b/planner/util/path.go index 10e994e998a22..665d71ccd4d23 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/ranger" @@ -138,3 +139,36 @@ func isColEqCorColOrConstant(ctx sessionctx.Context, filter expression.Expressio } return false } + +// OnlyPointRange checks whether each range is a point(no interval range exists). +func (path *AccessPath) OnlyPointRange(sc *stmtctx.StatementContext) bool { + noIntervalRange := true + if path.IsIntHandlePath { + for _, ran := range path.Ranges { + if !ran.IsPoint(sc) { + noIntervalRange = false + break + } + } + return noIntervalRange + } + haveNullVal := false + for _, ran := range path.Ranges { + // Not point or the not full matched. + if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) { + noIntervalRange = false + break + } + // Check whether there's null value. + for i := 0; i < len(path.Index.Columns); i++ { + if ran.HighVal[i].IsNull() { + haveNullVal = true + break + } + } + if haveNullVal { + break + } + } + return noIntervalRange && !haveNullVal +} diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 57d1791d7d5fd..33df3016e5aeb 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -242,7 +242,7 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo { if lhs == nil { - lhs = make([]*valueInfo, numCols) + return rhs } if rhs != nil { for i, valInfo := range lhs { @@ -634,11 +634,11 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression if valInfo == nil { continue } - sameVale, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j]) + sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j]) if err != nil { return nil, nil, nil, false, errors.Trace(err) } - if !sameVale { + if !sameValue { columnValues[j] = nil } } From ff458ef67d5b6afdf9b7c5b362d5f7dc61a7832f Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 29 Jul 2021 13:07:40 +0800 Subject: [PATCH 12/21] append warning about heuristic index selection --- planner/core/stats.go | 18 +++++++++++++++++- util/ranger/detacher.go | 9 ++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/planner/core/stats.go b/planner/core/stats.go index 67e6bf8c4f773..0c2f4dba17c3b 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -363,8 +363,24 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * if selected != nil { ds.possibleAccessPaths[0] = selected ds.possibleAccessPaths = ds.possibleAccessPaths[:1] - // TODO: Can we make a more carefull check on whether the optimization depends on mutable constants? + // TODO: Can we make a more careful check on whether the optimization depends on mutable constants? ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true + if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt { + var tableName, pathName string + if ds.TableAsName.O == "" { + tableName = ds.tableInfo.Name.O + } else { + tableName = ds.TableAsName.O + } + if !selected.IsTablePath() { + pathName = "primary key of " + tableName + } else { + pathName = "index " + selected.Index.Name.O + " of " + tableName + } + // TODO: Do we need to specify which heuristic rule `selected` matches? It is kind of hard to briefly describe the + // three heuristic rules. Besides, we can distinguish the three rules by checking EXPLAIN result. + ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected by heuristics")) + } } // TODO: implement UnionScan + IndexMerge diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 33df3016e5aeb..e3566a8119afa 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -207,7 +207,7 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E return &DetachRangeResult{}, -1, nil, nil } // take the union of the two columnValues - columnValues = unionColumnValues(columnValues, res.ColumnValues, len(cols)) + columnValues = unionColumnValues(columnValues, res.ColumnValues) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } @@ -240,12 +240,15 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E return r, offset, columnValues, nil } -func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo { +func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { if lhs == nil { return rhs } if rhs != nil { for i, valInfo := range lhs { + if i >= len(rhs) { + break + } if valInfo == nil && rhs[i] != nil { lhs[i] = rhs[i] } @@ -312,7 +315,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi if err != nil { return nil, err } - res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues, len(d.cols)) + res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) if pointRes != nil { if len(pointRes.Ranges) == 0 { return &DetachRangeResult{}, nil From c7ab8771ed4e777c49387f5bd7e2024e2c8a31f8 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 29 Jul 2021 16:24:58 +0800 Subject: [PATCH 13/21] add test for heuristics --- planner/core/integration_test.go | 29 ++++++++++++- planner/core/stats.go | 8 ++-- .../core/testdata/integration_suite_in.json | 9 ++++ .../core/testdata/integration_suite_out.json | 43 +++++++++++++++++++ 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 95b8fd391f387..39f2be2e4ae3c 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -4016,13 +4016,13 @@ func (s *testIntegrationSerialSuite) TestCTESelfJoin(c *C) { with inv as (select t1a , t3a, sum(t2c) from t1, t2, t3 - where t2a = t1a + where t2a = t1a and t2b = t3b and t3c = 1998 group by t1a, t3a) select inv1.t1a, inv2.t3a from inv inv1, inv inv2 - where inv1.t1a = inv2.t1a + where inv1.t1a = inv2.t1a and inv1.t3a = 4 and inv2.t3a = 4+1`) } @@ -4050,3 +4050,28 @@ func (s *testIntegrationSuite) TestIssue26559(c *C) { tk.MustExec("insert into t values('2020-07-29 09:07:01', '2020-07-27 16:57:36');") tk.MustQuery("select greatest(a, b) from t union select null;").Sort().Check(testkit.Rows("2020-07-29 09:07:01", "")) } + +func (s *testIntegrationSuite) TestHeuristicIndexSelection(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int, b int, c int, d int, e int, f int, g int, primary key (a), unique key c_d_e (c, d, e), unique key f (f), unique key f_g (f, g), key g (g))") + tk.MustExec("create table t2(a int, b int, c int, d int, unique index idx_a (a), unique index idx_b_c (b, c), unique index idx_b_c_a_d (b, c, a, d))") + + var input []string + var output []struct { + SQL string + Plan []string + Warnings []string + } + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows()) + output[i].Warnings = s.testData.ConvertRowsToStrings(tk.MustQuery("show warnings").Rows()) + }) + tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...)) + } +} diff --git a/planner/core/stats.go b/planner/core/stats.go index 0c2f4dba17c3b..c8ab4ba873271 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -316,7 +316,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * singleScanIdxs = append(singleScanIdxs, path) } } - if len(uniqueIdxsWithDoubleScan) > 0 { + if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 { // TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning. uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan)) for _, uniqueIdx := range uniqueIdxsWithDoubleScan { @@ -347,12 +347,12 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * } // `refineBest` may not always be better than `uniqueBest`. // ``` - // create table t(int a, int b, int c, int d, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d)); + // create table t(a int, b int, c int, d int, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d)); // select a, b, c from t where a = 1 and b = 2 and c in (1, 2, 3, 4, 5); // ``` // In the case, `refinedBest` is `idx_b_c_a_d` and `uniqueBest` is `a`. `idx_b_c_a_d` needs to access five points while `idx_a` // only needs one point access and one table access. - // Hence we should compare `2 * len(uniqueBest.Ranges)` and `len(refinedBest.Ranges)` to select the better one. + // Hence we should compare `len(refinedBest.Ranges)` and `2*len(uniqueBest.Ranges)` to select the better one. if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) { selected = refinedBest } else { @@ -372,7 +372,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * } else { tableName = ds.TableAsName.O } - if !selected.IsTablePath() { + if selected.IsTablePath() { pathName = "primary key of " + tableName } else { pathName = "index " + selected.Index.Name.O + " of " + tableName diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index e7e9bb12e6001..901d509459403 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -320,5 +320,14 @@ "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c", "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d" ] + }, + { + "name": "TestHeuristicIndexSelection", + "cases": [ + "select f, g from t1 where f = 2 and g in (3, 4, 5)", + "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)", + "select f, g from t1 where f = 2 and g > 3", + "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);" + ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index b93d2ef382647..beeedddb0ef36 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1679,5 +1679,48 @@ ] } ] + }, + { + "Name": "TestHeuristicIndexSelection", + "Cases": [ + { + "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)", + "Plan": [ + "Batch_Point_Get_5 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false" + ], + "Warnings": [ + "Note 1105 index f_g of t1 is selected by heuristics" + ] + }, + { + "SQL": "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)", + "Plan": [ + "Batch_Point_Get_5 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false" + ], + "Warnings": [ + "Note 1105 index c_d_e of t1 is selected by heuristics" + ] + }, + { + "SQL": "select f, g from t1 where f = 2 and g > 3", + "Plan": [ + "IndexReader_6 33.33 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo" + ], + "Warnings": [ + "Note 1105 index f_g of t1 is selected by heuristics" + ] + }, + { + "SQL": "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)", + "Plan": [ + "Selection_6 0.01 root eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)", + "└─Point_Get_5 1.00 root table:t2, index:idx_a(a) " + ], + "Warnings": [ + "Note 1105 index idx_a of t2 is selected by heuristics" + ] + } + ] } ] From 4f8465128422317f56d954440832b63f3b3853b2 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 29 Jul 2021 17:29:59 +0800 Subject: [PATCH 14/21] add test --- planner/core/stats.go | 9 ++++++++- planner/core/testdata/integration_suite_out.json | 12 ++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/planner/core/stats.go b/planner/core/stats.go index c8ab4ba873271..a4e19996834a6 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -306,7 +306,14 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * // as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path. if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) { if path.IsTablePath() || path.Index.Unique { - if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) { + var singleScan bool + if path.IsTablePath() { + singleScan = true + } else { + singleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + } + if singleScan { + // TODO: What if multiple paths satisfy all conditions? selected = path break } diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index beeedddb0ef36..b6ce5c8d28671 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1686,7 +1686,7 @@ { "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)", "Plan": [ - "Batch_Point_Get_5 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false" + "Batch_Point_Get 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false" ], "Warnings": [ "Note 1105 index f_g of t1 is selected by heuristics" @@ -1695,7 +1695,7 @@ { "SQL": "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)", "Plan": [ - "Batch_Point_Get_5 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false" + "Batch_Point_Get 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false" ], "Warnings": [ "Note 1105 index c_d_e of t1 is selected by heuristics" @@ -1704,8 +1704,8 @@ { "SQL": "select f, g from t1 where f = 2 and g > 3", "Plan": [ - "IndexReader_6 33.33 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo" + "IndexReader 33.33 root index:IndexRangeScan", + "└─IndexRangeScan 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo" ], "Warnings": [ "Note 1105 index f_g of t1 is selected by heuristics" @@ -1714,8 +1714,8 @@ { "SQL": "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)", "Plan": [ - "Selection_6 0.01 root eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)", - "└─Point_Get_5 1.00 root table:t2, index:idx_a(a) " + "Selection 0.01 root eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)", + "└─Point_Get 1.00 root table:t2, index:idx_a(a) " ], "Warnings": [ "Note 1105 index idx_a of t2 is selected by heuristics" From ded7ec893ebe80ea50c04fd28ffbef72845c3560 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Thu, 29 Jul 2021 17:46:23 +0800 Subject: [PATCH 15/21] fmt --- planner/core/stats.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/stats.go b/planner/core/stats.go index a4e19996834a6..4e5cee99236e2 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -15,7 +15,6 @@ package core import ( "context" - "golang.org/x/tools/container/intsets" "math" "sort" @@ -31,6 +30,7 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" "go.uber.org/zap" + "golang.org/x/tools/container/intsets" ) func (p *basePhysicalPlan) StatsCount() float64 { From a2900fc13b403ae5c5d6c159c54290f2d799a4db Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 2 Aug 2021 14:17:18 +0800 Subject: [PATCH 16/21] add orderByPKLimitN --- planner/core/logical_plan_builder.go | 18 ++++++++++++++++++ planner/core/logical_plans.go | 2 ++ 2 files changed, 20 insertions(+) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 838a97bf0359d..fecb7cf404a20 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -3407,6 +3407,15 @@ func (b *PlanBuilder) TableHints() *tableHintInfo { return &(b.tableHintInfo[len(b.tableHintInfo)-1]) } +func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.SelectStmt) { + ds, ok := p.(*DataSource) + if !ok || sel.OrderBy == nil || sel.Limit == nil { + return + } + + ds.orderByPKLimitN = true +} + func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p LogicalPlan, err error) { b.pushSelectOffset(sel.QueryBlockOffset) b.pushTableHints(sel.TableHints, sel.QueryBlockOffset) @@ -3466,6 +3475,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L if err != nil { return nil, err } + // For filling DataSource.orderByPKLimitN + ds, isDataSource := p.(*DataSource) originalFields := sel.Fields.Fields sel.Fields.Fields, err = b.unfoldWildStar(p, sel.Fields.Fields) @@ -3650,6 +3661,13 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L if err != nil { return nil, err } + if logicalSort, isSort := p.(*LogicalSort); isSort && sel.Limit != nil && isDataSource { + if col, isCol := logicalSort.ByItems[0].Expr.(*expression.Column); isCol && ds.handleCols.NumCols() == 1 { + if ds.handleCols.GetCol(0).Equal(nil, col) && sel.Limit != nil { + ds.orderByPKLimitN = true + } + } + } } if sel.Limit != nil { diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 7c2a7b8b2ad8f..4e60bf82a947d 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -523,6 +523,8 @@ type DataSource struct { // 1. use `inside insert`, `update`, `delete` or `select for update` statement // 2. isolation level is RC isForUpdateRead bool + // orderByPKLimitN is true iff there exists `order by pk limit n` pattern. + orderByPKLimitN bool } // ExtractCorrelatedCols implements LogicalPlan interface. From ab63beb620d270d55b9cabe6565866c6b324a437 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 2 Aug 2021 16:52:45 +0800 Subject: [PATCH 17/21] resolve some TODOs --- planner/core/exhaust_physical_plans.go | 2 +- planner/core/find_best_task.go | 32 ++++++------ planner/core/stats.go | 50 +++++++++++-------- .../core/testdata/integration_suite_in.json | 1 + .../core/testdata/integration_suite_out.json | 17 +++++-- planner/util/path.go | 2 + 6 files changed, 61 insertions(+), 43 deletions(-) diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 6eaa43d5a192b..dbdb5194e4e36 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -1045,7 +1045,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( Columns: ds.TblCols, ColumnNames: ds.names, } - if !ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table) { + if !path.IsSingleScan { // On this way, it's double read case. ts := PhysicalTableScan{ Columns: ds.Columns, diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 0d14a0b9c83be..d96807c86edc0 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -417,7 +417,6 @@ type candidatePath struct { path *util.AccessPath accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions. indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters. - isSingleScan bool isMatchProp bool } @@ -452,8 +451,8 @@ func compareBool(l, r bool) int { } func compareIndexBack(lhs, rhs *candidatePath) (int, bool) { - result := compareBool(lhs.isSingleScan, rhs.isSingleScan) - if result == 0 && !lhs.isSingleScan { + result := compareBool(lhs.path.IsSingleScan, rhs.path.IsSingleScan) + if result == 0 && !lhs.path.IsSingleScan { // if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters // to compare how many table rows will be accessed. return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet) @@ -542,16 +541,14 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph candidate := &candidatePath{path: path} candidate.isMatchProp = ds.isMatchProp(path, prop) candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) - candidate.isSingleScan = true return candidate } -func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath { +func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { candidate := &candidatePath{path: path} candidate.isMatchProp = ds.isMatchProp(path, prop) candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters) - candidate.isSingleScan = isSingleScan return candidate } @@ -594,14 +591,13 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida continue } } else { - coveredByIdx := ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) - if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || coveredByIdx { + if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || path.IsSingleScan { // We will use index to generate physical plan if any of the following conditions is satisfied: // 1. This path's access cond is not nil. // 2. We have a non-empty prop to match. // 3. This index is forced to choose. // 4. The needed columns are all covered by index columns(and handleCol). - currentCandidate = ds.getIndexCandidate(path, prop, coveredByIdx) + currentCandidate = ds.getIndexCandidate(path, prop) } else { continue } @@ -1097,7 +1093,7 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) (*expression.C // convertToIndexScan converts the DataSource to index scan with idx. func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) { - if !candidate.isSingleScan { + if !candidate.path.IsSingleScan { // If it's parent requires single read task, return max cost. if prop.TaskTp == property.CopSingleReadTaskType { return invalidTask, nil @@ -1110,7 +1106,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid return invalidTask, nil } path := candidate.path - is, cost, _ := ds.getOriginalPhysicalIndexScan(prop, path, candidate.isMatchProp, candidate.isSingleScan) + is, cost, _ := ds.getOriginalPhysicalIndexScan(prop, path, candidate.isMatchProp, candidate.path.IsSingleScan) cop := &copTask{ indexPlan: is, tblColHists: ds.TblColHists, @@ -1122,7 +1118,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid Columns: ds.TblCols, ColumnNames: ds.names, } - if !candidate.isSingleScan { + if !candidate.path.IsSingleScan { // On this way, it's double read case. ts := PhysicalTableScan{ Columns: ds.Columns, @@ -1688,8 +1684,8 @@ func (ds *DataSource) convertToPointGet(prop *property.PhysicalProperty, candida if !prop.IsEmpty() && !candidate.isMatchProp { return invalidTask } - if prop.TaskTp == property.CopDoubleReadTaskType && candidate.isSingleScan || - prop.TaskTp == property.CopSingleReadTaskType && !candidate.isSingleScan { + if prop.TaskTp == property.CopDoubleReadTaskType && candidate.path.IsSingleScan || + prop.TaskTp == property.CopSingleReadTaskType && !candidate.path.IsSingleScan { return invalidTask } @@ -1745,7 +1741,7 @@ func (ds *DataSource) convertToPointGet(prop *property.PhysicalProperty, candida pointGetPlan.IdxColLens = candidate.path.IdxColLens pointGetPlan.IndexValues = candidate.path.Ranges[0].LowVal pointGetPlan.PartitionInfo = partitionInfo - if candidate.isSingleScan { + if candidate.path.IsSingleScan { cost = pointGetPlan.GetCost(candidate.path.IdxCols) } else { cost = pointGetPlan.GetCost(ds.TblCols) @@ -1771,8 +1767,8 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, ca if !prop.IsEmpty() && !candidate.isMatchProp { return invalidTask } - if prop.TaskTp == property.CopDoubleReadTaskType && candidate.isSingleScan || - prop.TaskTp == property.CopSingleReadTaskType && !candidate.isSingleScan { + if prop.TaskTp == property.CopDoubleReadTaskType && candidate.path.IsSingleScan || + prop.TaskTp == property.CopSingleReadTaskType && !candidate.path.IsSingleScan { return invalidTask } @@ -1819,7 +1815,7 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, ca batchPointGetPlan.KeepOrder = true batchPointGetPlan.Desc = prop.SortItems[0].Desc } - if candidate.isSingleScan { + if candidate.path.IsSingleScan { cost = batchPointGetPlan.GetCost(candidate.path.IdxCols) } else { cost = batchPointGetPlan.GetCost(ds.TblCols) diff --git a/planner/core/stats.go b/planner/core/stats.go index 4e5cee99236e2..2e98e3d664c64 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -15,8 +15,10 @@ package core import ( "context" + "fmt" "math" "sort" + "strings" "github.com/pingcap/errors" "github.com/pingcap/parser/ast" @@ -286,45 +288,39 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths) uniqueIdxsWithDoubleScan := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths)) singleScanIdxs := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths)) - var selected, uniqueBest, refinedBest *util.AccessPath + var ( + selected, uniqueBest, refinedBest *util.AccessPath + isRefinedPath bool + ) for _, path := range ds.possibleAccessPaths { if path.IsTablePath() { err := ds.deriveTablePathStats(path, ds.pushedDownConds, false) if err != nil { return nil, err } + path.IsSingleScan = true } else { ds.deriveIndexPathStats(path, ds.pushedDownConds, false) + path.IsSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) } - // TODO: Should we handle TiFlash case specially? // Try some heuristic rules to select access path. if len(path.Ranges) == 0 { selected = path break } - // TODO: Can we record isSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) - // as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path. if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) { if path.IsTablePath() || path.Index.Unique { - var singleScan bool - if path.IsTablePath() { - singleScan = true - } else { - singleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) - } - if singleScan { - // TODO: What if multiple paths satisfy all conditions? + if path.IsSingleScan { selected = path break } uniqueIdxsWithDoubleScan = append(uniqueIdxsWithDoubleScan, path) } - } else if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) { + } else if path.IsSingleScan { singleScanIdxs = append(singleScanIdxs, path) } } if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 { - // TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning. uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan)) for _, uniqueIdx := range uniqueIdxsWithDoubleScan { uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds)) @@ -362,6 +358,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * // Hence we should compare `len(refinedBest.Ranges)` and `2*len(uniqueBest.Ranges)` to select the better one. if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) { selected = refinedBest + isRefinedPath = true } else { selected = uniqueBest } @@ -373,20 +370,33 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * // TODO: Can we make a more careful check on whether the optimization depends on mutable constants? ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt { - var tableName, pathName string + var tableName string if ds.TableAsName.O == "" { tableName = ds.tableInfo.Name.O } else { tableName = ds.TableAsName.O } if selected.IsTablePath() { - pathName = "primary key of " + tableName + // TODO: primary key / handle / real name? + ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(fmt.Sprintf("handle of %s is selected since the path only has point ranges", tableName))) } else { - pathName = "index " + selected.Index.Name.O + " of " + tableName + var sb strings.Builder + if selected.Index.Unique { + sb.WriteString("unique ") + } + sb.WriteString(fmt.Sprintf("index %s of %s is selected since the path", selected.Index.Name.O, tableName)) + if isRefinedPath { + sb.WriteString(" only fetches limited number of rows") + } else { + sb.WriteString(" only has point ranges") + } + if selected.IsSingleScan { + sb.WriteString(" with single scan") + } else { + sb.WriteString(" with double scan") + } + ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String())) } - // TODO: Do we need to specify which heuristic rule `selected` matches? It is kind of hard to briefly describe the - // three heuristic rules. Besides, we can distinguish the three rules by checking EXPLAIN result. - ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected by heuristics")) } } diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 901d509459403..2b661b674c535 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -324,6 +324,7 @@ { "name": "TestHeuristicIndexSelection", "cases": [ + "select * from t1 where a = 3 or a = 5", "select f, g from t1 where f = 2 and g in (3, 4, 5)", "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)", "select f, g from t1 where f = 2 and g > 3", diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index b6ce5c8d28671..4a1b76289375e 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1683,13 +1683,22 @@ { "Name": "TestHeuristicIndexSelection", "Cases": [ + { + "SQL": "select * from t1 where a = 3 or a = 5", + "Plan": [ + "Batch_Point_Get 2.00 root table:t1 handle:[3 5], keep order:false, desc:false" + ], + "Warnings": [ + "Note 1105 handle of t1 is selected since the path only has point ranges" + ] + }, { "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)", "Plan": [ "Batch_Point_Get 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false" ], "Warnings": [ - "Note 1105 index f_g of t1 is selected by heuristics" + "Note 1105 unique index f_g of t1 is selected since the path only has point ranges with single scan" ] }, { @@ -1698,7 +1707,7 @@ "Batch_Point_Get 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false" ], "Warnings": [ - "Note 1105 index c_d_e of t1 is selected by heuristics" + "Note 1105 unique index c_d_e of t1 is selected since the path only has point ranges with double scan" ] }, { @@ -1708,7 +1717,7 @@ "└─IndexRangeScan 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo" ], "Warnings": [ - "Note 1105 index f_g of t1 is selected by heuristics" + "Note 1105 unique index f_g of t1 is selected since the path only fetches limited number of rows with single scan" ] }, { @@ -1718,7 +1727,7 @@ "└─Point_Get 1.00 root table:t2, index:idx_a(a) " ], "Warnings": [ - "Note 1105 index idx_a of t2 is selected by heuristics" + "Note 1105 unique index idx_a of t2 is selected since the path only has point ranges with double scan" ] } ] diff --git a/planner/util/path.go b/planner/util/path.go index 665d71ccd4d23..5a29d007c29db 100644 --- a/planner/util/path.go +++ b/planner/util/path.go @@ -62,6 +62,8 @@ type AccessPath struct { IsCommonHandlePath bool // Forced means this path is generated by `use/force index()`. Forced bool + // IsSingleScan indicates whether the path is a single index/table scan or table access after index scan. + IsSingleScan bool } // IsTablePath returns true if it's IntHandlePath or CommonHandlePath. From 11f3080d122ff4ffccb70aa4ed2073380df1a3cf Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 2 Aug 2021 19:39:20 +0800 Subject: [PATCH 18/21] upd --- planner/core/logical_plan_builder.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index fecb7cf404a20..c93c7d5ae0206 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -3416,6 +3416,27 @@ func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.Se ds.orderByPKLimitN = true } +func checkOrderByPK(ds *DataSource, byItems []*util.ByItems) bool { + if ds.tableInfo.PKIsHandle && len(byItems) == 1 { + if col, isCol := byItems[0].Expr.(*expression.Column); isCol && col.Equal(nil, ds.getPKIsHandleCol()) { + return true + } + return false + } + if ds.tableInfo.IsCommonHandle && len(byItems) == len(ds.commonHandleCols) { + orderByPK := true + for i, byItem := range byItems { + if col, isCol := byItem.Expr.(*expression.Column); !isCol || ds.commonHandleLens[i] != types.UnspecifiedLength || + !col.Equal(nil, ds.commonHandleCols[i]) || (i > 0 && byItem.Desc != byItems[i-1].Desc) { + orderByPK = false + break + } + } + return orderByPK + } + return false +} + func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p LogicalPlan, err error) { b.pushSelectOffset(sel.QueryBlockOffset) b.pushTableHints(sel.TableHints, sel.QueryBlockOffset) @@ -3662,11 +3683,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L return nil, err } if logicalSort, isSort := p.(*LogicalSort); isSort && sel.Limit != nil && isDataSource { - if col, isCol := logicalSort.ByItems[0].Expr.(*expression.Column); isCol && ds.handleCols.NumCols() == 1 { - if ds.handleCols.GetCol(0).Equal(nil, col) && sel.Limit != nil { - ds.orderByPKLimitN = true - } - } + ds.orderByPKLimitN = checkOrderByPK(ds, logicalSort.ByItems) } } From 97c34172d4fe374992b5b49bcaafff758e4ab1fc Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Tue, 3 Aug 2021 17:45:22 +0800 Subject: [PATCH 19/21] upd & add testcases --- executor/set_test.go | 10 ++++++ planner/core/integration_test.go | 24 ++++++++++++++ planner/core/stats.go | 32 ++++++++++++++++++- .../core/testdata/integration_suite_in.json | 9 +++++- .../core/testdata/integration_suite_out.json | 30 +++++++++++++++++ sessionctx/variable/session.go | 4 +++ sessionctx/variable/sysvar.go | 4 +++ sessionctx/variable/tidb_vars.go | 4 +++ 8 files changed, 115 insertions(+), 2 deletions(-) diff --git a/executor/set_test.go b/executor/set_test.go index 8980efe58cad5..9e7d7a0dc3a9e 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -539,6 +539,16 @@ func (s *testSerialSuite1) TestSetVar(c *C) { tk.MustExec(`set tidb_opt_limit_push_down_threshold = 20`) tk.MustQuery(`select @@global.tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("100")) tk.MustQuery(`select @@tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("20")) + + tk.MustQuery("select @@tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0")) + tk.MustExec("set global tidb_enable_maybe_good_heuristics = 1") + tk.MustQuery("select @@global.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("1")) + tk.MustExec("set global tidb_enable_maybe_good_heuristics = 0") + tk.MustQuery("select @@global.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0")) + tk.MustExec("set session tidb_enable_maybe_good_heuristics = 1") + tk.MustQuery("select @@session.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("1")) + tk.MustExec("set session tidb_enable_maybe_good_heuristics = 0") + tk.MustQuery("select @@session.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0")) } func (s *testSuite5) TestTruncateIncorrectIntSessionVar(c *C) { diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 94e0e5096c965..ae2e5edfda4d2 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -4164,3 +4164,27 @@ func (s *testIntegrationSuite) TestHeuristicIndexSelection(c *C) { tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...)) } } + +func (s *testIntegrationSuite) TestMaybeGoodHeuristics(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int primary key, b int, c int, index idx_b(b))") + + var input []string + var output []struct { + SQL string + Plan []string + Warnings []string + } + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows()) + output[i].Warnings = s.testData.ConvertRowsToStrings(tk.MustQuery("show warnings").Rows()) + }) + tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...)) + } +} diff --git a/planner/core/stats.go b/planner/core/stats.go index 2e98e3d664c64..0c31d1902cfa9 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -308,7 +308,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * selected = path break } - if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) { + if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) { if path.IsTablePath() || path.Index.Unique { if path.IsSingleScan { selected = path @@ -398,6 +398,36 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String())) } } + } else if ds.ctx.GetSessionVars().EnableMaybeGoodHeuristics && ds.orderByPKLimitN { + // maybe-good heuristics + // For query like `where index_col = ... order by pk limit n`, if the count of `index_col = ...` is small enough, we prefer the index. + for _, path := range ds.possibleAccessPaths { + // TODO: add a variable instead of using 100 + if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) && path.CountAfterAccess < 100 { + selected = path + break + } + } + if selected != nil { + ds.possibleAccessPaths[0] = selected + ds.possibleAccessPaths = ds.possibleAccessPaths[:1] + // TODO: Can we make a more careful check on whether the optimization depends on mutable constants? + ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true + if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt { + var tableName, pathName string + if ds.TableAsName.O == "" { + tableName = ds.tableInfo.Name.O + } else { + tableName = ds.TableAsName.O + } + if selected.IsTablePath() { + pathName = "handle of " + tableName + } else { + pathName = "index " + selected.Index.Name.O + " of " + tableName + } + ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern")) + } + } } // TODO: implement UnionScan + IndexMerge diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 2b661b674c535..48c6b6f42f0d9 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -328,7 +328,14 @@ "select f, g from t1 where f = 2 and g in (3, 4, 5)", "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)", "select f, g from t1 where f = 2 and g > 3", - "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);" + "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)" + ] + }, + { + "name": "TestMaybeGoodHeuristics", + "cases": [ + "select * from t where b = 3 order by a limit 10", + "select * from t where b in (2, 3, 4) order by a limit 10" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 4a1b76289375e..5a5032cefbe73 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1731,5 +1731,35 @@ ] } ] + }, + { + "Name": "TestMaybeGoodHeuristics", + "Cases": [ + { + "SQL": "select * from t where b = 3 order by a limit 10", + "Plan": [ + "IndexLookUp 10.00 root limit embedded(offset:0, count:10)", + "├─Limit(Build) 10.00 cop[tikv] offset:0, count:10", + "│ └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo", + "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Warnings": [ + "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern" + ] + }, + { + "SQL": "select * from t where b in (2, 3, 4) order by a limit 10", + "Plan": [ + "TopN 0.00 root test.t.a, offset:0, count:10", + "└─IndexLookUp 10.00 root ", + " ├─TopN(Build) 10.00 cop[tikv] test.t.a, offset:0, count:10", + " │ └─IndexRangeScan 30.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Warnings": [ + "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern" + ] + } + ] } ] diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 1baa10d590da8..7059fe593da54 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -871,6 +871,9 @@ type SessionVars struct { // TemporaryTableData stores committed kv values for temporary table for current session. TemporaryTableData kv.MemBuffer + + // EnableMaybeGoodHeuristics indicates whether to apply maybe-good heuristics when the optimizer generates plans. + EnableMaybeGoodHeuristics bool } // AllocMPPTaskID allocates task id for mpp tasks. It will reset the task id if the query's @@ -1087,6 +1090,7 @@ func NewSessionVars() *SessionVars { CTEMaxRecursionDepth: DefCTEMaxRecursionDepth, TMPTableSize: DefTMPTableSize, EnableGlobalTemporaryTable: DefTiDBEnableGlobalTemporaryTable, + EnableMaybeGoodHeuristics: DefTiDBEnableMaybeGoodHeuristics, } vars.KVVars = tikvstore.NewVariables(&vars.Killed) vars.Concurrency = Concurrency{ diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index a791d52063abf..e5fdb87e2a315 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1769,6 +1769,10 @@ var defaultSysVars = []*SysVar{ s.EnableStableResultMode = TiDBOptOn(val) return nil }}, + {Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableMaybeGoodHeuristics, Value: BoolToOnOff(DefTiDBEnableMaybeGoodHeuristics), Hidden: true, Type: TypeBool, SetSession: func(s *SessionVars, val string) error { + s.EnableMaybeGoodHeuristics = TiDBOptOn(val) + return nil + }}, } // FeedbackProbability points to the FeedbackProbability in statistics package. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index c500aa4eb6727..e47dc179add04 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -580,6 +580,9 @@ const ( // TiDBEnableOrderedResultMode indicates if stabilize query results. TiDBEnableOrderedResultMode = "tidb_enable_ordered_result_mode" + + // TiDBEnableMaybeGoodHeuristics indicates whether to apply maybe-good heuristics when the optimizer generates plans. + TiDBEnableMaybeGoodHeuristics = "tidb_enable_maybe_good_heuristics" ) // TiDB vars that have only global scope @@ -739,6 +742,7 @@ const ( DefTMPTableSize = 16777216 DefTiDBEnableLocalTxn = false DefTiDBEnableOrderedResultMode = false + DefTiDBEnableMaybeGoodHeuristics = false ) // Process global variables. From fb8ed4950b6eab81102d676f148ead368129d83d Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 9 Aug 2021 12:16:36 +0800 Subject: [PATCH 20/21] fix and add subquery test --- planner/core/integration_test.go | 3 +- planner/core/stats.go | 30 ++++++++++++++ .../core/testdata/integration_suite_in.json | 3 +- .../core/testdata/integration_suite_out.json | 39 ++++++++++++++----- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index b18dc2b1fdf85..1c83aeb6cfe3d 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -4183,7 +4183,8 @@ func (s *testIntegrationSuite) TestMaybeGoodHeuristics(c *C) { tk.MustExec("use test") tk.MustExec("drop table if exists t") tk.MustExec("create table t(a int primary key, b int, c int, index idx_b(b))") - + tk.MustExec("set tidb_enable_maybe_good_heuristics = 1") + var input []string var output []struct { SQL string diff --git a/planner/core/stats.go b/planner/core/stats.go index 3a21a6f14d904..025c23e3faad0 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -373,6 +373,36 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error { ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String())) } } + } else if ds.ctx.GetSessionVars().EnableMaybeGoodHeuristics && ds.orderByPKLimitN { + // maybe-good heuristics + // For query like `where index_col = ... order by pk limit n`, if the count of `index_col = ...` is small enough, we prefer the index. + for _, path := range ds.possibleAccessPaths { + const smallCountAfterAccess = 100 + if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) && path.CountAfterAccess < smallCountAfterAccess { + selected = path + break + } + } + if selected != nil { + ds.possibleAccessPaths[0] = selected + ds.possibleAccessPaths = ds.possibleAccessPaths[:1] + // TODO: Can we make a more careful check on whether the optimization depends on mutable constants? + ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true + if ds.ctx.GetSessionVars().StmtCtx.InVerboseExplain { + var tableName, pathName string + if ds.TableAsName.O == "" { + tableName = ds.tableInfo.Name.O + } else { + tableName = ds.TableAsName.O + } + if selected.IsTablePath() { + pathName = "handle of " + tableName + } else { + pathName = "index " + selected.Index.Name.O + " of " + tableName + } + ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern")) + } + } } return nil } diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 52c863ff0faf6..fcb037db7fc79 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -335,7 +335,8 @@ "name": "TestMaybeGoodHeuristics", "cases": [ "select * from t where b = 3 order by a limit 10", - "select * from t where b in (2, 3, 4) order by a limit 10" + "select * from t where b in (2, 3, 4) order by a limit 10", + "select * from t as t1 where t1.a > any(select t2.b from t as t2 where t2.b = 2 and t2.c > t1.a order by t2.a limit 10)" ] }, { diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 3ca941a7f70f2..06d88db893ddc 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -1738,10 +1738,10 @@ { "SQL": "select * from t where b = 3 order by a limit 10", "Plan": [ - "IndexLookUp 10.00 root limit embedded(offset:0, count:10)", - "├─Limit(Build) 10.00 cop[tikv] offset:0, count:10", - "│ └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo", - "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + "IndexLookUp_20 10.00 201.28 root limit embedded(offset:0, count:10)", + "├─Limit_19(Build) 10.00 590.00 cop[tikv] offset:0, count:10", + "│ └─IndexRangeScan_17 10.00 590.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo", + "└─TableRowIDScan_18(Probe) 10.00 590.00 cop[tikv] table:t keep order:false, stats:pseudo" ], "Warnings": [ "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern" @@ -1750,15 +1750,36 @@ { "SQL": "select * from t where b in (2, 3, 4) order by a limit 10", "Plan": [ - "TopN 0.00 root test.t.a, offset:0, count:10", - "└─IndexLookUp 10.00 root ", - " ├─TopN(Build) 10.00 cop[tikv] test.t.a, offset:0, count:10", - " │ └─IndexRangeScan 30.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo", - " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" + "TopN_9 10.00 379.61 root test.t.a, offset:0, count:10", + "└─IndexLookUp_16 10.00 279.95 root ", + " ├─TopN_15(Build) 10.00 0.00 cop[tikv] test.t.a, offset:0, count:10", + " │ └─IndexRangeScan_13 30.00 1770.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo", + " └─TableRowIDScan_14(Probe) 10.00 1770.00 cop[tikv] table:t keep order:false, stats:pseudo" ], "Warnings": [ "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern" ] + }, + { + "SQL": "select * from t as t1 where t1.a > any(select t2.b from t as t2 where t2.b = t1.b order by t2.a limit 10)", + "Plan": [ + "Projection_16 10000.00 2533281.69 root test.t.a, test.t.b, test.t.c", + "└─Apply_18 10000.00 2527263.69 root CARTESIAN inner join, other cond:or(gt(test.t.a, Column#8), if(ne(Column#9, 0), NULL, 0))", + " ├─TableReader_20(Build) 10000.00 54251.33 root data:TableFullScan_19", + " │ └─TableFullScan_19 10000.00 570020.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─Selection_21(Probe) 0.80 244.90 root ne(Column#10, 0)", + " └─HashAgg_22 1.00 241.90 root funcs:min(Column#13)->Column#8, funcs:sum(Column#14)->Column#9, funcs:count(1)->Column#10", + " └─Projection_46 8.00 0.00 root test.t.b, cast(isnull(test.t.b), decimal(22,0) BINARY)->Column#14", + " └─Limit_28 8.00 194.50 root offset:0, count:10", + " └─IndexLookUp_37 8.00 194.50 root ", + " ├─IndexRangeScan_34(Build) 10.00 590.00 cop[tikv] table:t2, index:idx_b(b) range:[2,2], keep order:true, stats:pseudo", + " └─Selection_36(Probe) 8.00 0.00 cop[tikv] gt(test.t.c, test.t.a)", + " └─TableRowIDScan_35 10.00 590.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Warnings": [ + "Note 1105 index idx_b of t2 is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern", + "Note 1105 [t1] remain after pruning paths for t1 given Prop{SortItems: [], TaskTp: rootTask}" + ] } ] }, From c451d9eeba6e6c69e34980c22f3a7d0cd9b0cba2 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 9 Aug 2021 12:52:06 +0800 Subject: [PATCH 21/21] remove unused func --- planner/core/logical_plan_builder.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index f620290339f35..f534ba0154fda 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -3407,15 +3407,6 @@ func (b *PlanBuilder) TableHints() *tableHintInfo { return &(b.tableHintInfo[len(b.tableHintInfo)-1]) } -func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.SelectStmt) { - ds, ok := p.(*DataSource) - if !ok || sel.OrderBy == nil || sel.Limit == nil { - return - } - - ds.orderByPKLimitN = true -} - func checkOrderByPK(ds *DataSource, byItems []*util.ByItems) bool { if ds.tableInfo.PKIsHandle && len(byItems) == 1 { if col, isCol := byItems[0].Expr.(*expression.Column); isCol && col.Equal(nil, ds.getPKIsHandleCol()) {