Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: consider prefix index column length in skyline pruning #27527

Merged
merged 11 commits into from
Nov 26, 2021
51 changes: 15 additions & 36 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import (
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
"go.uber.org/zap"
"golang.org/x/tools/container/intsets"
)

const (
Expand Down Expand Up @@ -417,30 +416,10 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {

// candidatePath is used to maintain required info for skyline pruning.
type candidatePath struct {
path *util.AccessPath
accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
isMatchProp bool
}

// compareColumnSet will compares the two set. The last return value is used to indicate
// if they are comparable, it is false when both two sets have columns that do not occur in the other.
// When the second return value is true, the value of first:
// (1) -1 means that `l` is a strict subset of `r`;
// (2) 0 means that `l` equals to `r`;
// (3) 1 means that `l` is a strict superset of `r`.
func compareColumnSet(l, r *intsets.Sparse) (int, bool) {
lLen, rLen := l.Len(), r.Len()
if lLen < rLen {
// -1 is meaningful only when l.SubsetOf(r) is true.
return -1, l.SubsetOf(r)
}
if lLen == rLen {
// 0 is meaningful only when l.SubsetOf(r) is true.
return 0, l.SubsetOf(r)
}
// 1 is meaningful only when r.SubsetOf(l) is true.
return 1, r.SubsetOf(l)
path *util.AccessPath
accessCondsColMap util.Col2Len // accessCondsColMap maps Column.UniqueID to column length for the columns in AccessConds.
indexCondsColMap util.Col2Len // indexCondsColMap maps Column.UniqueID to column length for the columns in AccessConds and indexFilters.
isMatchProp bool
}

func compareBool(l, r bool) int {
Expand All @@ -456,21 +435,21 @@ func compareBool(l, r bool) int {
func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
result := compareBool(lhs.path.IsSingleScan, rhs.path.IsSingleScan)
if result == 0 && !lhs.path.IsSingleScan {
// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
// if both lhs and rhs need to access table after IndexScan, we utilize the set of columns that occurred in AccessConds and IndexFilters
// to compare how many table rows will be accessed.
return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
return util.CompareCol2Len(lhs.indexCondsColMap, rhs.indexCondsColMap)
}
return result, true
}

// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
// (1): the set of columns that occurred in the access condition,
// (2): whether or not it matches the physical property
// (3): does it require a double scan.
// (2): does it require a double scan,
// (3): whether or not it matches the physical property.
// If `x` is not worse than `y` at all factors,
// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
func compareCandidates(lhs, rhs *candidatePath) int {
setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet)
accessResult, comparable := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
if !comparable {
return 0
}
Expand All @@ -479,11 +458,11 @@ func compareCandidates(lhs, rhs *candidatePath) int {
return 0
}
matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
sum := setsResult + scanResult + matchResult
if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
sum := accessResult + scanResult + matchResult
if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
return 1
}
if setsResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 {
if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && sum < 0 {
return -1
}
return 0
Expand Down Expand Up @@ -543,15 +522,15 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.accessCondsColMap = util.ExtractCol2Len(path.AccessConds, nil, nil)
return candidate
}

func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
candidate.accessCondsColMap = util.ExtractCol2Len(path.AccessConds, path.IdxCols, path.IdxColLens)
candidate.indexCondsColMap = util.ExtractCol2Len(append(path.AccessConds, path.IndexFilters...), path.FullIdxCols, path.FullIdxColLens)
return candidate
}

Expand Down
9 changes: 9 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4859,7 +4859,16 @@ func (s *testIntegrationSuite) TestIssues29711(c *C) {
" └─TopN(Probe) 10.00 cop[tikv] test.t29711.a, offset:0, count:10",
" └─TableRowIDScan 10000.00 cop[tikv] table:t29711 keep order:false, stats:pseudo",
))
}

func (s *testIntegrationSuite) TestIssue27313(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a varchar(100), b int, c int, index idx1(a(2), b), index idx2(a))")
tk.MustExec("explain format = 'verbose' select * from t where a = 'abcdefghijk' and b > 4")
// no warning indicates that idx2 is not pruned by idx1.
tk.MustQuery("show warnings").Check(testkit.Rows())
}

func (s *testIntegrationSuite) TestIssue30094(c *C) {
Expand Down
13 changes: 6 additions & 7 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/zap"
"golang.org/x/tools/container/intsets"
)

func (p *basePhysicalPlan) StatsCount() float64 {
Expand Down Expand Up @@ -297,9 +296,9 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
}
}
if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 {
uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan))
uniqueIdxAccessCols := make([]util.Col2Len, 0, len(uniqueIdxsWithDoubleScan))
for _, uniqueIdx := range uniqueIdxsWithDoubleScan {
uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds))
uniqueIdxAccessCols = append(uniqueIdxAccessCols, uniqueIdx.GetCol2LenFromAccessConds())
// Find the unique index with the minimal number of ranges as `uniqueBest`.
if uniqueBest == nil || len(uniqueIdx.Ranges) < len(uniqueBest.Ranges) {
uniqueBest = uniqueIdx
Expand All @@ -314,10 +313,10 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
// Hence, for each index in `singleScanIdxs`, we check whether it is better than some index in `uniqueIdxsWithDoubleScan`.
// If yes, the index is a refined one. We find the refined index with the minimal number of ranges as `refineBest`.
for _, singleScanIdx := range singleScanIdxs {
columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds)
for _, uniqueIdxColumnSet := range uniqueIdxColumnSets {
setsResult, comparable := compareColumnSet(columnSet, uniqueIdxColumnSet)
if comparable && setsResult == 1 {
col2Len := singleScanIdx.GetCol2LenFromAccessConds()
for _, uniqueIdxCol2Len := range uniqueIdxAccessCols {
accessResult, comparable := util.CompareCol2Len(col2Len, uniqueIdxCol2Len)
if comparable && accessResult == 1 {
if refinedBest == nil || len(singleScanIdx.Ranges) < len(refinedBest.Ranges) {
refinedBest = singleScanIdx
}
Expand Down
27 changes: 27 additions & 0 deletions planner/util/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"testing"

"github.com/pingcap/tidb/util/testbridge"
"go.uber.org/goleak"
)

func TestMain(m *testing.M) {
testbridge.WorkaroundGoCheckFlags()
goleak.VerifyTestMain(m)
}
104 changes: 104 additions & 0 deletions planner/util/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,107 @@ func (path *AccessPath) OnlyPointRange(sctx sessionctx.Context) bool {
}
return noIntervalRange && !haveNullVal
}

// Col2Len maps expression.Column.UniqueID to column length
type Col2Len map[int64]int

// ExtractCol2Len collects index/table columns with lengths from expressions. If idxCols and idxColLens are not nil, it collects index columns with lengths(maybe prefix lengths).
// Otherwise it collects table columns with full lengths.
func ExtractCol2Len(exprs []expression.Expression, idxCols []*expression.Column, idxColLens []int) Col2Len {
col2len := make(Col2Len, len(idxCols))
for _, expr := range exprs {
extractCol2LenFromExpr(expr, idxCols, idxColLens, col2len)
}
return col2len
}

func extractCol2LenFromExpr(expr expression.Expression, idxCols []*expression.Column, idxColLens []int, col2Len Col2Len) {
switch v := expr.(type) {
case *expression.Column:
if idxCols == nil {
col2Len[v.UniqueID] = types.UnspecifiedLength
} else {
for i, col := range idxCols {
if col != nil && v.EqualByExprAndID(nil, col) {
col2Len[v.UniqueID] = idxColLens[i]
break
}
}
}
case *expression.ScalarFunction:
for _, arg := range v.GetArgs() {
extractCol2LenFromExpr(arg, idxCols, idxColLens, col2Len)
}
}
}

// compareLength will compare the two column lengths. The return value:
// (1) -1 means that l is shorter than r;
// (2) 0 means that l equals to r;
// (3) 1 means that l is longer than r;
func compareLength(l, r int) int {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some comments to explain the return value.

if l == r {
return 0
}
if l == types.UnspecifiedLength {
return 1
}
if r == types.UnspecifiedLength {
return -1
}
if l > r {
return 1
}
return -1
}

// dominate return true if each column of c2 exists in c1 and c2's column length is no longer than c1's column length.
func (c1 Col2Len) dominate(c2 Col2Len) bool {
if len(c2) > len(c1) {
return false
}
for colID, len2 := range c2 {
len1, ok := c1[colID]
if !ok || compareLength(len2, len1) == 1 {
return false
}
}
return true
}

// CompareCol2Len will compare the two Col2Len maps. The last return value is used to indicate whether they are comparable.
// When the second return value is true, the first return value:
// (1) -1 means that c1 is worse than c2;
// (2) 0 means that c1 equals to c2;
// (3) 1 means that c1 is better than c2;
func CompareCol2Len(c1, c2 Col2Len) (int, bool) {
l1, l2 := len(c1), len(c2)
if l1 > l2 {
if c1.dominate(c2) {
return 1, true
}
return 0, false
}
if l1 < l2 {
if c2.dominate(c1) {
return -1, true
}
return 0, false
}
// If c1 and c2 have the same columns but have different lengths on some column, we regard c1 and c2 incomparable.
for colID, colLen2 := range c2 {
colLen1, ok := c1[colID]
if !ok || colLen1 != colLen2 {
return 0, false
}
}
return 0, true
}

// GetCol2LenFromAccessConds returns columns with lengths from path.AccessConds.
func (path *AccessPath) GetCol2LenFromAccessConds() Col2Len {
if path.IsTablePath() {
return ExtractCol2Len(path.AccessConds, nil, nil)
}
return ExtractCol2Len(path.AccessConds, path.IdxCols, path.IdxColLens)
}
72 changes: 72 additions & 0 deletions planner/util/path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package util

import (
"testing"
winoros marked this conversation as resolved.
Show resolved Hide resolved

"github.com/stretchr/testify/require"
)

func TestCompareCol2Len(t *testing.T) {
tests := []struct {
c1 Col2Len
c2 Col2Len
res int
comparable bool
}{
{
c1: Col2Len{1: -1, 2: -1, 3: -1},
c2: Col2Len{1: -1, 2: 10},
res: 1,
comparable: true,
},
{
c1: Col2Len{1: 5},
c2: Col2Len{1: 10, 2: -1},
res: -1,
comparable: true,
},
{
c1: Col2Len{1: -1, 2: -1},
c2: Col2Len{1: -1, 2: 5, 3: -1},
res: 0,
comparable: false,
},
{
c1: Col2Len{1: -1, 2: 10},
c2: Col2Len{1: -1, 2: 5, 3: -1},
res: 0,
comparable: false,
},
{
c1: Col2Len{1: -1, 2: 10},
c2: Col2Len{1: -1, 2: 10},
res: 0,
comparable: true,
},
{
c1: Col2Len{1: -1, 2: -1},
c2: Col2Len{1: -1, 2: 10},
res: 0,
comparable: false,
},
}
for _, tt := range tests {
res, comparable := CompareCol2Len(tt.c1, tt.c2)
require.Equal(t, tt.res, res)
require.Equal(t, tt.comparable, comparable)
}
}