Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizer: Fix range extraction for CNF(conjunctive normal form) | tidb-test=pr/2341 #53908

Merged
merged 3 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions pkg/bindinfo/global_handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,11 +550,10 @@ func TestSetVarFixControlWithBinding(t *testing.T) {
tk.MustExec(`create table t(id int, a varchar(100), b int, c int, index idx_ab(a, b))`)
tk.MustQuery(`explain select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`).Check(
testkit.Rows(
`IndexLookUp_12 0.01 root `,
`├─Selection_10(Build) 0.02 cop[tikv] or(eq(test.t.a, "xx"), and(eq(test.t.a, "kk"), eq(test.t.b, 1)))`,
`│ └─IndexRangeScan_8 20.00 cop[tikv] table:t, index:idx_ab(a, b) range:["kk","kk"], ["xx","xx"], keep order:false, stats:pseudo`,
`└─Selection_11(Probe) 0.01 cop[tikv] eq(test.t.c, 10)`,
` └─TableRowIDScan_9 0.02 cop[tikv] table:t keep order:false, stats:pseudo`))
`IndexLookUp_11 0.01 root `,
`├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:["kk" 1,"kk" 1], ["xx","xx"], keep order:false, stats:pseudo`,
`└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)`,
` └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo`))

tk.MustExec(`create global binding using select /*+ set_var(tidb_opt_fix_control='44389:ON') */ * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`)
tk.MustQuery(`show warnings`).Check(testkit.Rows()) // no warning
Expand Down
7 changes: 6 additions & 1 deletion pkg/expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,12 @@ func containOuterNot(expr Expression, not bool) bool {
// Contains tests if `exprs` contains `e`.
func Contains(exprs []Expression, e Expression) bool {
for _, expr := range exprs {
if e == expr {
// Check string equivalence if one of the expressions is a clone.
sameString := false
if e != nil && expr != nil {
sameString = (e.String() == expr.String())
}
if e == expr || sameString {
return true
}
}
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/core/casetest/index/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 3,
deps = [
"//pkg/testkit",
"//pkg/testkit/testdata",
Expand Down
28 changes: 28 additions & 0 deletions pkg/planner/core/casetest/index/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,31 @@ func TestInvisibleIndex(t *testing.T) {
`IndexReader_7 10000.00 root index:IndexFullScan_6`,
`└─IndexFullScan_6 10000.00 cop[tikv] table:t1, index:a(a) keep order:false, stats:pseudo`))
}

func TestRangeDerivation(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a1 int, b1 int, c1 int, primary key pkx (a1,b1));")
tk.MustExec("create table t1char (a1 char(5), b1 char(5), c1 int, primary key pkx (a1,b1));")
tk.MustExec("create table t(a int, b int, c int, primary key(a,b));")
tk.MustExec("create table tuk (a int, b int, c int, unique key (a, b, c));")
tk.MustExec("set @@session.tidb_regard_null_as_point=false;")

var input []string
var output []struct {
SQL string
Plan []string
}
indexRangeSuiteData := GetIndexRangeSuiteData()
indexRangeSuiteData.LoadTestCases(t, &input, &output)
indexRangeSuiteData.LoadTestCases(t, &input, &output)
for i, sql := range input {
plan := tk.MustQuery("explain format = 'brief' " + sql)
testdata.OnRecord(func() {
output[i].SQL = sql
output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows())
})
plan.Check(testkit.Rows(output[i].Plan...))
}
}
5 changes: 5 additions & 0 deletions pkg/planner/core/casetest/index/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func TestMain(m *testing.M) {

flag.Parse()
testDataMap.LoadTestSuiteData("testdata", "integration_suite")
testDataMap.LoadTestSuiteData("testdata", "index_range")

opts := []goleak.Option{
goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"),
Expand All @@ -50,6 +51,10 @@ func TestMain(m *testing.M) {
goleak.VerifyTestMain(testmain.WrapTestingM(m, callback), opts...)
}

func GetIndexRangeSuiteData() testdata.TestData {
return testDataMap["index_range"]
}

func GetIntegrationSuiteData() testdata.TestData {
return testDataMap["integration_suite"]
}
23 changes: 23 additions & 0 deletions pkg/planner/core/casetest/index/testdata/index_range_in.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
{
"name": "TestRangeDerivation",
"cases": [
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ",
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct",
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval",
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct",
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)",
"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)",
"select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list",
"select * from tuk where a<=>null and b>0 and b<2;",
"select a,b,c from tuk where a>3 and b=4 order by a,c;",
// Same test cases with char type
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ",
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct",
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval",
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct",
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)",
"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)"
]
}
]
144 changes: 144 additions & 0 deletions pkg/planner/core/casetest/index/testdata/index_range_out.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
[
{
"Name": "TestRangeDerivation",
"Cases": [
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1118.52 cop[tikv] gt(test.t1.c1, 10)",
" └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1118.52 cop[tikv] gt(test.t1.c1, 10)",
" └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3356.57 cop[tikv] table:t1 range:[-inf,10), [10 -inf,10 20), keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1122.61 cop[tikv] or(gt(test.t1.a1, 1), and(eq(test.t1.a1, 1), gt(test.t1.b1, 10))), or(lt(test.t1.a1, 10), and(eq(test.t1.a1, 10), lt(test.t1.b1, 20)))",
" └─TableRangeScan 1403.26 cop[tikv] table:t1 range:[1,1], (1,10), [10,10], keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list",
"Plan": [
"Selection 0.00 root eq(test.t.c, 3)",
"└─Batch_Point_Get 2.00 root table:t, clustered index:PRIMARY(a, b) keep order:false, desc:false"
]
},
{
"SQL": "select * from tuk where a<=>null and b>0 and b<2;",
"Plan": [
"IndexReader 0.25 root index:Selection",
"└─Selection 0.25 cop[tikv] eq(test.tuk.b, 1)",
" └─IndexRangeScan 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
]
},
{
"SQL": "select a,b,c from tuk where a>3 and b=4 order by a,c;",
"Plan": [
"IndexReader 3.33 root index:Selection",
"└─Selection 3.33 cop[tikv] eq(test.tuk.b, 4)",
" └─IndexRangeScan 3333.33 cop[tikv] table:tuk, index:a(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1118.52 cop[tikv] gt(test.t1char.c1, 10)",
" └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1118.52 cop[tikv] gt(test.t1char.c1, 10)",
" └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─TableRangeScan 3356.57 cop[tikv] table:t1char range:[-inf,\"10\"), [\"10\" -inf,\"10\" \"20\"), keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)",
"Plan": [
"HashAgg 1.00 root funcs:count(Column#5)->Column#4",
"└─TableReader 1.00 root data:HashAgg",
" └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5",
" └─Selection 1122.61 cop[tikv] or(gt(test.t1char.a1, \"1\"), and(eq(test.t1char.a1, \"1\"), gt(test.t1char.b1, \"10\"))), or(lt(test.t1char.a1, \"10\"), and(eq(test.t1char.a1, \"10\"), lt(test.t1char.b1, \"20\")))",
" └─TableRangeScan 1403.26 cop[tikv] table:t1char range:[\"1\",\"1\"], (\"1\",\"10\"), [\"10\",\"10\"], keep order:false, stats:pseudo"
]
}
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,8 @@
"IndexPlan": [
"HashJoin 0.03 root CARTESIAN inner join",
"├─IndexReader(Build) 0.01 root partition:p0 index:Selection",
"│ └─Selection 0.01 cop[tikv] eq(test_partition_1.t1.id, 7), or(eq(test_partition_1.t1.a, 1), and(eq(test_partition_1.t1.a, 3), in(test_partition_1.t1.b, 3, 5)))",
"│ └─IndexRangeScan 20.00 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3,3], keep order:false, stats:pseudo",
"│ └─Selection 0.01 cop[tikv] eq(test_partition_1.t1.id, 7)",
"│ └─IndexRangeScan 10.20 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3 3,3 3], [3 5,3 5], keep order:false, stats:pseudo",
"└─IndexReader(Probe) 3.00 root partition:p1 index:IndexRangeScan",
" └─IndexRangeScan 3.00 cop[tikv] table:t2, index:a(a, b, id) range:[6 7 7,6 7 7], [7 7 7,7 7 7], [8 7 7,8 7 7], keep order:false, stats:pseudo"
]
Expand Down
16 changes: 16 additions & 0 deletions pkg/util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ func extractBestCNFItemRanges(sctx *rangerctx.RangerContext, conds []expression.
bestRes = curRes
}
}

if bestRes != nil && bestRes.rangeResult != nil {
bestRes.rangeResult.IsDNFCond = false
}
Expand Down Expand Up @@ -462,6 +463,21 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
// TODO: we will optimize it later.
res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds)
res.Ranges = ranges
// Choosing between point ranges and bestCNF is needed since bestCNF does not cover the intersection
// of all conjuncts. Even when we add support for intersection, it could be turned off by a flag or it could be
// incomplete due to a long list of conjuncts.
if bestCNFItemRes != nil && res != nil && len(res.Ranges) != 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please comment this case " (heuristics applied for long lists or we turn off the intersection)" in front of here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges)
pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges)
// Pick bestCNFIsSubset if it is more selective than point ranges(res).
// Apply optimization if bestCNFItemRes is a proper subset of point ranges.
if bestCNFIsSubset && !pointRangeIsSubset {
// Update final result and just update: Ranges, AccessConds and RemainedConds
res.RemainedConds = removeConditions(res.RemainedConds, bestCNFItemRes.rangeResult.AccessConds)
res.Ranges = bestCNFItemRes.rangeResult.Ranges
res.AccessConds = bestCNFItemRes.rangeResult.AccessConds
}
}
return res, nil
}
for _, cond := range newConditions {
Expand Down
79 changes: 79 additions & 0 deletions pkg/util/ranger/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,82 @@ func formatDatum(d types.Datum, isLeftSide bool) string {
}
return fmt.Sprintf("%v", d.GetValue())
}

// Check if a list of Datum is a prefix of another list of Datum. This is useful for checking if
// lower/upper bound of a range is a subset of another.
func prefix(tc types.Context, superValue []types.Datum, supValue []types.Datum, length int, collators []collate.Collator) bool {
for i := 0; i < length; i++ {
cmp, err := superValue[i].Compare(tc, &supValue[i], collators[i])
if (err != nil) || (cmp != 0) {
return false
}
}
return true
}

// Subset checks if a list of ranges(rs) is a subset of another list of ranges(superRanges).
// This is true if every range in the first list is a subset of any
// range in the second list. Also, we check if all elements of superRanges are covered.
func (rs Ranges) Subset(tc types.Context, superRanges Ranges) bool {
var subset bool
superRangesCovered := make([]bool, len(superRanges))
if len(rs) == 0 {
return len(superRanges) == 0
} else if len(superRanges) == 0 {
// unrestricted superRanges and restricted rs
return true
}

for _, subRange := range rs {
subset = false
for i, superRange := range superRanges {
if subRange.Subset(tc, superRange) {
subset = true
superRangesCovered[i] = true
break
}
}
if !subset {
return false
}
}
for i := 0; i < len(superRangesCovered); i++ {
if !superRangesCovered[i] {
return false
}
}

return true
}

// Subset for Range type, check if range(ran) is a subset of another range(superRange).
// This is done by:
// - Both ran and superRange have the same collators. This is not needed for the current code path.
// But, it is used here for future use of the function.
// - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of ran.
// Thus include checking open/closed inetrvals.
func (ran *Range) Subset(tc types.Context, superRange *Range) bool {
if len(ran.LowVal) < len(superRange.LowVal) {
return false
}

// Make sure both ran and superRange have the same collations.
// The current code path for this function always will have same collation
// for ran and superRange. It is added here for future
// use of the function.
for i := 0; i < len(superRange.LowVal); i++ {
if ran.Collators[i] != superRange.Collators[i] {
return false
}
}

// Either superRange is closed or both ranges have the same open/close setting.
lowExcludeOK := !superRange.LowExclude || ran.LowExclude == superRange.LowExclude
highExcludeOK := !superRange.HighExclude || ran.HighExclude == superRange.HighExclude
if !lowExcludeOK || !highExcludeOK {
return false
}

return prefix(tc, superRange.LowVal, ran.LowVal, len(superRange.LowVal), ran.Collators) &&
prefix(tc, superRange.HighVal, ran.HighVal, len(superRange.LowVal), ran.Collators)
}
Loading