From 3a98da6d6c854c0c5c9bb75f67f40468e373e0d9 Mon Sep 17 00:00:00 2001 From: ghazalfamilyusa Date: Mon, 10 Jun 2024 15:28:42 -0700 Subject: [PATCH 1/3] Optimizer:Fix range extraction for CNF(conjunctive normal form) predicates --- pkg/expression/util.go | 6 +- pkg/planner/core/casetest/index/index_test.go | 33 +++- pkg/planner/core/casetest/index/main_test.go | 5 + .../index/testdata/index_range_in.json | 23 +++ .../index/testdata/index_range_out.json | 144 ++++++++++++++++++ pkg/util/ranger/detacher.go | 13 ++ pkg/util/ranger/types.go | 86 +++++++++++ tests/integrationtest/r/util/ranger.result | 13 +- 8 files changed, 313 insertions(+), 10 deletions(-) create mode 100644 pkg/planner/core/casetest/index/testdata/index_range_in.json create mode 100644 pkg/planner/core/casetest/index/testdata/index_range_out.json diff --git a/pkg/expression/util.go b/pkg/expression/util.go index f5f81c0ef82b8..308805f62f653 100644 --- a/pkg/expression/util.go +++ b/pkg/expression/util.go @@ -17,6 +17,7 @@ package expression import ( "bytes" "context" + "fmt" "math" "strconv" "strings" @@ -992,7 +993,10 @@ func containOuterNot(expr Expression, not bool) bool { // Contains tests if `exprs` contains `e`. func Contains(exprs []Expression, e Expression) bool { for _, expr := range exprs { - if e == expr { + // Check string equivalence if one of the expressions is a clone. + str1 := fmt.Sprintf("", e) + str2 := fmt.Sprintf("", expr) + if e == expr || (str1 == str2) { return true } } diff --git a/pkg/planner/core/casetest/index/index_test.go b/pkg/planner/core/casetest/index/index_test.go index 8e50923a16824..a72e670d77262 100644 --- a/pkg/planner/core/casetest/index/index_test.go +++ b/pkg/planner/core/casetest/index/index_test.go @@ -85,13 +85,42 @@ func TestInvisibleIndex(t *testing.T) { tk.MustExec("use test") tk.MustExec("CREATE TABLE t1 ( a INT, KEY( a ) INVISIBLE );") tk.MustExec("INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);") - tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check( + tk.MustQuery(`select a FROM t1;`).Check( testkit.Rows( `TableReader_5 10000.00 root data:TableFullScan_4`, `└─TableFullScan_4 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo`)) tk.MustExec("set session tidb_opt_use_invisible_indexes=on;") - tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check( + tk.MustQuery(`select a FROM t1;`).Check( testkit.Rows( `IndexReader_7 10000.00 root index:IndexFullScan_6`, `└─IndexFullScan_6 10000.00 cop[tikv] table:t1, index:a(a) keep order:false, stats:pseudo`)) } + +func TestRangeDerivation(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a1 int, b1 int, c1 int, primary key pkx (a1,b1));") + tk.MustExec("create table t1char (a1 char(5), b1 char(5), c1 int, primary key pkx (a1,b1));") + tk.MustExec("create table t(a int, b int, c int, primary key(a,b));") + tk.MustExec("create table tuk (a int, b int, c int, unique key (a, b, c));") + tk.MustExec("set @@session.tidb_regard_null_as_point=false;") + + var input []string + var output []struct { + SQL string + Plan []string + } + indexRangeSuiteData := GetIndexRangeSuiteData() + indexRangeSuiteData.LoadTestCases(t, &input, &output) + indexRangeSuiteData.LoadTestCases(t, &input, &output) + for i, sql := range input { + plan := tk.MustQuery("explain format = 'brief' " + sql) + testdata.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows()) + }) + plan.Check(testkit.Rows(output[i].Plan...)) + } + +} diff --git a/pkg/planner/core/casetest/index/main_test.go b/pkg/planner/core/casetest/index/main_test.go index cad74fcb20ddb..e8ce32fdefc7e 100644 --- a/pkg/planner/core/casetest/index/main_test.go +++ b/pkg/planner/core/casetest/index/main_test.go @@ -31,6 +31,7 @@ func TestMain(m *testing.M) { flag.Parse() testDataMap.LoadTestSuiteData("testdata", "integration_suite") + testDataMap.LoadTestSuiteData("testdata", "index_range") opts := []goleak.Option{ goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), @@ -50,6 +51,10 @@ func TestMain(m *testing.M) { goleak.VerifyTestMain(testmain.WrapTestingM(m, callback), opts...) } +func GetIndexRangeSuiteData() testdata.TestData { + return testDataMap["index_range"] +} + func GetIntegrationSuiteData() testdata.TestData { return testDataMap["integration_suite"] } diff --git a/pkg/planner/core/casetest/index/testdata/index_range_in.json b/pkg/planner/core/casetest/index/testdata/index_range_in.json new file mode 100644 index 0000000000000..166d811251e2a --- /dev/null +++ b/pkg/planner/core/casetest/index/testdata/index_range_in.json @@ -0,0 +1,23 @@ +[ + { + "name": "TestRangeDerivation", + "cases": [ + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ", + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct", + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval", + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct", + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)", + "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)", + "select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list", + "select * from tuk where a<=>null and b>0 and b<2;", + "select a,b,c from tuk where a>3 and b=4 order by a,c;", + // Same test cases with char type + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ", + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct", + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval", + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct", + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)", + "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)" + ] + } +] diff --git a/pkg/planner/core/casetest/index/testdata/index_range_out.json b/pkg/planner/core/casetest/index/testdata/index_range_out.json new file mode 100644 index 0000000000000..f641f39df3f5c --- /dev/null +++ b/pkg/planner/core/casetest/index/testdata/index_range_out.json @@ -0,0 +1,144 @@ +[ + { + "Name": "TestRangeDerivation", + "Cases": [ + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1118.52 cop[tikv] gt(test.t1.c1, 10)", + " └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1118.52 cop[tikv] gt(test.t1.c1, 10)", + " └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3356.57 cop[tikv] table:t1 range:[-inf,10), [10 -inf,10 20), keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1122.61 cop[tikv] or(gt(test.t1.a1, 1), and(eq(test.t1.a1, 1), gt(test.t1.b1, 10))), or(lt(test.t1.a1, 10), and(eq(test.t1.a1, 10), lt(test.t1.b1, 20)))", + " └─TableRangeScan 1403.26 cop[tikv] table:t1 range:[1,1], (1,10), [10,10], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list", + "Plan": [ + "Selection 0.00 root eq(test.t.c, 3)", + "└─Batch_Point_Get 2.00 root table:t, clustered index:PRIMARY(a, b) keep order:false, desc:false" + ] + }, + { + "SQL": "select * from tuk where a<=>null and b>0 and b<2;", + "Plan": [ + "IndexReader 0.25 root index:Selection", + "└─Selection 0.25 cop[tikv] eq(test.tuk.b, 1)", + " └─IndexRangeScan 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select a,b,c from tuk where a>3 and b=4 order by a,c;", + "Plan": [ + "IndexReader 3.33 root index:Selection", + "└─Selection 3.33 cop[tikv] eq(test.tuk.b, 4)", + " └─IndexRangeScan 3333.33 cop[tikv] table:tuk, index:a(a, b, c) range:(3,+inf], keep order:true, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1118.52 cop[tikv] gt(test.t1char.c1, 10)", + " └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1118.52 cop[tikv] gt(test.t1char.c1, 10)", + " └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─TableRangeScan 3356.57 cop[tikv] table:t1char range:[-inf,\"10\"), [\"10\" -inf,\"10\" \"20\"), keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)", + "Plan": [ + "HashAgg 1.00 root funcs:count(Column#5)->Column#4", + "└─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#5", + " └─Selection 1122.61 cop[tikv] or(gt(test.t1char.a1, \"1\"), and(eq(test.t1char.a1, \"1\"), gt(test.t1char.b1, \"10\"))), or(lt(test.t1char.a1, \"10\"), and(eq(test.t1char.a1, \"10\"), lt(test.t1char.b1, \"20\")))", + " └─TableRangeScan 1403.26 cop[tikv] table:t1char range:[\"1\",\"1\"], (\"1\",\"10\"), [\"10\",\"10\"], keep order:false, stats:pseudo" + ] + } + ] + } +] diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go index 79e3996be3ee5..40211287a9c20 100644 --- a/pkg/util/ranger/detacher.go +++ b/pkg/util/ranger/detacher.go @@ -277,6 +277,7 @@ func extractBestCNFItemRanges(sctx *rangerctx.RangerContext, conds []expression. bestRes = curRes } } + if bestRes != nil && bestRes.rangeResult != nil { bestRes.rangeResult.IsDNFCond = false } @@ -462,6 +463,18 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi // TODO: we will optimize it later. res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds) res.Ranges = ranges + if bestCNFItemRes != nil { + bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges) + pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges) + // Pick bestCNFIsSubset if it is more selective than point ranges(res). + // Only optimization if it is a proper subset bestCNFIsSubset and !pointRangeIsSubset. + if bestCNFIsSubset && !pointRangeIsSubset { + // Update final result and just update: Ranges, AccessConds and RemainedConds + res.RemainedConds = removeConditions(res.RemainedConds, bestCNFItemRes.rangeResult.AccessConds) + res.Ranges = bestCNFItemRes.rangeResult.Ranges + res.AccessConds = bestCNFItemRes.rangeResult.AccessConds + } + } return res, nil } for _, cond := range newConditions { diff --git a/pkg/util/ranger/types.go b/pkg/util/ranger/types.go index e319173537d28..405ea8959c8d6 100644 --- a/pkg/util/ranger/types.go +++ b/pkg/util/ranger/types.go @@ -281,3 +281,89 @@ func formatDatum(d types.Datum, isLeftSide bool) string { } return fmt.Sprintf("%v", d.GetValue()) } + +// Check if a list of Datum is a prefix of another list of Datum. This is useful for checking if +// lower/upper bound of a range is a subset of another. +func prefix(tc types.Context, superValue []types.Datum, supValue []types.Datum, length int, collators []collate.Collator) bool { + for i := 0; i < length; i++ { + cmp, err := superValue[i].Compare(tc, &supValue[i], collators[i]) + if (err != nil) || (cmp != 0) { + return false + } + } + return true +} + +// Check if a list of ranges(subRanges) is a subset of another list of ranges(superRanges). +// This is true if every range in the first list is a subset of any +// range in the second list. Also, we check if all elements of superRanges are covered. +func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool { + var subset bool + superRangesCovered := make([]bool, len(superRanges)) + if len(subRanges) == 0 { + // Both lists are unrestricted + if len(superRanges) == 0 { + return true + } else { + // unrestricted subRanges and restricted superRanges + return false + } + } else if len(superRanges) == 0 { + // unrestricted superRanges and restricted subRanges + return true + } + + for _, subRange := range subRanges { + subset = false + for i, superRange := range superRanges { + if subRange.Subset(tc, superRange) { + subset = true + superRangesCovered[i] = true + break + } + } + if !subset { + return false + } + } + for i := 0; i < len(superRangesCovered); i++ { + if !superRangesCovered[i] { + return false + } + } + + return true +} + +// check if range(subRange) is a subset of another range(superRange). +// This is done by: +// - Both subRange and superRange have the same collators. This is not needed for the current code path. +// But, it is used here for future use of the function. +// - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of subRange. +// Thus include checking open/closed inetrvals. +func (subRange *Range) Subset(tc types.Context, superRange *Range) bool { + + if len(subRange.LowVal) < len(superRange.LowVal) { + return false + } + + // Make sure both subRange and superRange have the same collations. + // The current code path for this function always will have same collation + // for subRange and superRange. It is added here for future + // use of the function. + for i := 0; i < len(superRange.LowVal); i++ { + if subRange.Collators[i] != superRange.Collators[i] { + return false + } + } + + // Either superRange is closed or both ranges have the same open/close setting. + lowExcludeOK := !superRange.LowExclude || subRange.LowExclude == superRange.LowExclude + highExcludeOK := !superRange.HighExclude || subRange.HighExclude == superRange.HighExclude + if !lowExcludeOK || !highExcludeOK { + return false + } + + return prefix(tc, superRange.LowVal, subRange.LowVal, len(superRange.LowVal), subRange.Collators) && + prefix(tc, superRange.HighVal, subRange.HighVal, len(superRange.LowVal), subRange.Collators) +} diff --git a/tests/integrationtest/r/util/ranger.result b/tests/integrationtest/r/util/ranger.result index 7148284aadad8..69f1ae71fb27f 100644 --- a/tests/integrationtest/r/util/ranger.result +++ b/tests/integrationtest/r/util/ranger.result @@ -368,9 +368,9 @@ a b c 2 2 3 explain format='brief' select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3; id estRows task access object operator info -IndexReader 0.75 root index:Selection -└─Selection 0.75 cop[tikv] eq(util__ranger.t.c, 3), or(eq(util__ranger.t.a, 1), and(eq(util__ranger.t.a, 2), eq(util__ranger.t.b, 2))) - └─IndexRangeScan 2.00 cop[tikv] table:t, index:PRIMARY(a, b, c) range:[1,1], [2,2], keep order:false +IndexReader 1.00 root index:Selection +└─Selection 1.00 cop[tikv] eq(util__ranger.t.c, 3) + └─IndexRangeScan 2.00 cop[tikv] table:t, index:PRIMARY(a, b, c) range:[1,1], [2 2,2 2], keep order:false select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3; a b c 2 2 3 @@ -415,10 +415,9 @@ a b c 2 2 3 explain format='brief' select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3; id estRows task access object operator info -IndexLookUp 0.75 root -├─Selection(Build) 2.00 cop[tikv] or(eq(util__ranger.t.a, 1), and(eq(util__ranger.t.a, 2), eq(util__ranger.t.b, 2))) -│ └─IndexRangeScan 2.00 cop[tikv] table:t, index:PRIMARY(a, b) range:[1,1], [2,2], keep order:false -└─Selection(Probe) 0.75 cop[tikv] eq(util__ranger.t.c, 3) +IndexLookUp 1.00 root +├─IndexRangeScan(Build) 2.00 cop[tikv] table:t, index:PRIMARY(a, b) range:[1,1], [2 2,2 2], keep order:false +└─Selection(Probe) 1.00 cop[tikv] eq(util__ranger.t.c, 3) └─TableRowIDScan 2.00 cop[tikv] table:t keep order:false select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3; a b c From aa038315f630511d80f4abc8a0d719811a8f001c Mon Sep 17 00:00:00 2001 From: ghazalfamilyusa Date: Mon, 10 Jun 2024 15:52:10 -0700 Subject: [PATCH 2/3] end --- pkg/bindinfo/global_handle_test.go | 9 ++-- pkg/expression/util.go | 9 ++-- pkg/planner/core/casetest/index/BUILD.bazel | 1 + pkg/planner/core/casetest/index/index_test.go | 5 +-- .../testdata/partition_pruner_out.json | 4 +- pkg/util/ranger/detacher.go | 4 +- pkg/util/ranger/types.go | 43 ++++++++----------- 7 files changed, 34 insertions(+), 41 deletions(-) diff --git a/pkg/bindinfo/global_handle_test.go b/pkg/bindinfo/global_handle_test.go index 8eb4113ba0b5e..3325e9be37898 100644 --- a/pkg/bindinfo/global_handle_test.go +++ b/pkg/bindinfo/global_handle_test.go @@ -550,11 +550,10 @@ func TestSetVarFixControlWithBinding(t *testing.T) { tk.MustExec(`create table t(id int, a varchar(100), b int, c int, index idx_ab(a, b))`) tk.MustQuery(`explain select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`).Check( testkit.Rows( - `IndexLookUp_12 0.01 root `, - `├─Selection_10(Build) 0.02 cop[tikv] or(eq(test.t.a, "xx"), and(eq(test.t.a, "kk"), eq(test.t.b, 1)))`, - `│ └─IndexRangeScan_8 20.00 cop[tikv] table:t, index:idx_ab(a, b) range:["kk","kk"], ["xx","xx"], keep order:false, stats:pseudo`, - `└─Selection_11(Probe) 0.01 cop[tikv] eq(test.t.c, 10)`, - ` └─TableRowIDScan_9 0.02 cop[tikv] table:t keep order:false, stats:pseudo`)) + `IndexLookUp_11 0.01 root `, + `├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:["kk" 1,"kk" 1], ["xx","xx"], keep order:false, stats:pseudo`, + `└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)`, + ` └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo`)) tk.MustExec(`create global binding using select /*+ set_var(tidb_opt_fix_control='44389:ON') */ * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`) tk.MustQuery(`show warnings`).Check(testkit.Rows()) // no warning diff --git a/pkg/expression/util.go b/pkg/expression/util.go index 308805f62f653..43cb1a009553f 100644 --- a/pkg/expression/util.go +++ b/pkg/expression/util.go @@ -17,7 +17,6 @@ package expression import ( "bytes" "context" - "fmt" "math" "strconv" "strings" @@ -994,9 +993,11 @@ func containOuterNot(expr Expression, not bool) bool { func Contains(exprs []Expression, e Expression) bool { for _, expr := range exprs { // Check string equivalence if one of the expressions is a clone. - str1 := fmt.Sprintf("", e) - str2 := fmt.Sprintf("", expr) - if e == expr || (str1 == str2) { + sameString := false + if e != nil && expr != nil { + sameString = (e.String() == expr.String()) + } + if e == expr || sameString { return true } } diff --git a/pkg/planner/core/casetest/index/BUILD.bazel b/pkg/planner/core/casetest/index/BUILD.bazel index 96df3221e09ff..3f264708225fa 100644 --- a/pkg/planner/core/casetest/index/BUILD.bazel +++ b/pkg/planner/core/casetest/index/BUILD.bazel @@ -9,6 +9,7 @@ go_test( ], data = glob(["testdata/**"]), flaky = True, + shard_count = 3, deps = [ "//pkg/testkit", "//pkg/testkit/testdata", diff --git a/pkg/planner/core/casetest/index/index_test.go b/pkg/planner/core/casetest/index/index_test.go index a72e670d77262..3909b655667a4 100644 --- a/pkg/planner/core/casetest/index/index_test.go +++ b/pkg/planner/core/casetest/index/index_test.go @@ -85,12 +85,12 @@ func TestInvisibleIndex(t *testing.T) { tk.MustExec("use test") tk.MustExec("CREATE TABLE t1 ( a INT, KEY( a ) INVISIBLE );") tk.MustExec("INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);") - tk.MustQuery(`select a FROM t1;`).Check( + tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check( testkit.Rows( `TableReader_5 10000.00 root data:TableFullScan_4`, `└─TableFullScan_4 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo`)) tk.MustExec("set session tidb_opt_use_invisible_indexes=on;") - tk.MustQuery(`select a FROM t1;`).Check( + tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check( testkit.Rows( `IndexReader_7 10000.00 root index:IndexFullScan_6`, `└─IndexFullScan_6 10000.00 cop[tikv] table:t1, index:a(a) keep order:false, stats:pseudo`)) @@ -122,5 +122,4 @@ func TestRangeDerivation(t *testing.T) { }) plan.Check(testkit.Rows(output[i].Plan...)) } - } diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json index ee3b7bb9d2e0d..5721832e2b05e 100644 --- a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json +++ b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json @@ -480,8 +480,8 @@ "IndexPlan": [ "HashJoin 0.03 root CARTESIAN inner join", "├─IndexReader(Build) 0.01 root partition:p0 index:Selection", - "│ └─Selection 0.01 cop[tikv] eq(test_partition_1.t1.id, 7), or(eq(test_partition_1.t1.a, 1), and(eq(test_partition_1.t1.a, 3), in(test_partition_1.t1.b, 3, 5)))", - "│ └─IndexRangeScan 20.00 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3,3], keep order:false, stats:pseudo", + "│ └─Selection 0.01 cop[tikv] eq(test_partition_1.t1.id, 7)", + "│ └─IndexRangeScan 10.20 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3 3,3 3], [3 5,3 5], keep order:false, stats:pseudo", "└─IndexReader(Probe) 3.00 root partition:p1 index:IndexRangeScan", " └─IndexRangeScan 3.00 cop[tikv] table:t2, index:a(a, b, id) range:[6 7 7,6 7 7], [7 7 7,7 7 7], [8 7 7,8 7 7], keep order:false, stats:pseudo" ] diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go index 40211287a9c20..32a9d3b72c4d4 100644 --- a/pkg/util/ranger/detacher.go +++ b/pkg/util/ranger/detacher.go @@ -463,11 +463,11 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi // TODO: we will optimize it later. res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds) res.Ranges = ranges - if bestCNFItemRes != nil { + if bestCNFItemRes != nil && res != nil && len(res.Ranges) != 0 { bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges) pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges) // Pick bestCNFIsSubset if it is more selective than point ranges(res). - // Only optimization if it is a proper subset bestCNFIsSubset and !pointRangeIsSubset. + // Apply optimization if bestCNFItemRes is a proper subset of point ranges. if bestCNFIsSubset && !pointRangeIsSubset { // Update final result and just update: Ranges, AccessConds and RemainedConds res.RemainedConds = removeConditions(res.RemainedConds, bestCNFItemRes.rangeResult.AccessConds) diff --git a/pkg/util/ranger/types.go b/pkg/util/ranger/types.go index 405ea8959c8d6..ef8977baa080a 100644 --- a/pkg/util/ranger/types.go +++ b/pkg/util/ranger/types.go @@ -294,26 +294,20 @@ func prefix(tc types.Context, superValue []types.Datum, supValue []types.Datum, return true } -// Check if a list of ranges(subRanges) is a subset of another list of ranges(superRanges). +// Subset checks if a list of ranges(rs) is a subset of another list of ranges(superRanges). // This is true if every range in the first list is a subset of any // range in the second list. Also, we check if all elements of superRanges are covered. -func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool { +func (rs Ranges) Subset(tc types.Context, superRanges Ranges) bool { var subset bool superRangesCovered := make([]bool, len(superRanges)) - if len(subRanges) == 0 { - // Both lists are unrestricted - if len(superRanges) == 0 { - return true - } else { - // unrestricted subRanges and restricted superRanges - return false - } + if len(rs) == 0 { + return len(superRanges) == 0 } else if len(superRanges) == 0 { - // unrestricted superRanges and restricted subRanges + // unrestricted superRanges and restricted rs return true } - for _, subRange := range subRanges { + for _, subRange := range rs { subset = false for i, superRange := range superRanges { if subRange.Subset(tc, superRange) { @@ -335,35 +329,34 @@ func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool { return true } -// check if range(subRange) is a subset of another range(superRange). +// Subset for Range type, check if range(ran) is a subset of another range(superRange). // This is done by: -// - Both subRange and superRange have the same collators. This is not needed for the current code path. +// - Both ran and superRange have the same collators. This is not needed for the current code path. // But, it is used here for future use of the function. -// - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of subRange. +// - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of ran. // Thus include checking open/closed inetrvals. -func (subRange *Range) Subset(tc types.Context, superRange *Range) bool { - - if len(subRange.LowVal) < len(superRange.LowVal) { +func (ran *Range) Subset(tc types.Context, superRange *Range) bool { + if len(ran.LowVal) < len(superRange.LowVal) { return false } - // Make sure both subRange and superRange have the same collations. + // Make sure both ran and superRange have the same collations. // The current code path for this function always will have same collation - // for subRange and superRange. It is added here for future + // for ran and superRange. It is added here for future // use of the function. for i := 0; i < len(superRange.LowVal); i++ { - if subRange.Collators[i] != superRange.Collators[i] { + if ran.Collators[i] != superRange.Collators[i] { return false } } // Either superRange is closed or both ranges have the same open/close setting. - lowExcludeOK := !superRange.LowExclude || subRange.LowExclude == superRange.LowExclude - highExcludeOK := !superRange.HighExclude || subRange.HighExclude == superRange.HighExclude + lowExcludeOK := !superRange.LowExclude || ran.LowExclude == superRange.LowExclude + highExcludeOK := !superRange.HighExclude || ran.HighExclude == superRange.HighExclude if !lowExcludeOK || !highExcludeOK { return false } - return prefix(tc, superRange.LowVal, subRange.LowVal, len(superRange.LowVal), subRange.Collators) && - prefix(tc, superRange.HighVal, subRange.HighVal, len(superRange.LowVal), subRange.Collators) + return prefix(tc, superRange.LowVal, ran.LowVal, len(superRange.LowVal), ran.Collators) && + prefix(tc, superRange.HighVal, ran.HighVal, len(superRange.LowVal), ran.Collators) } From 9c7f0c49a729a7fe80ab74409d8329b8030b7445 Mon Sep 17 00:00:00 2001 From: Ahmad Ghazal <113858565+ghazalfamilyusa@users.noreply.github.com> Date: Thu, 13 Jun 2024 20:42:56 -0700 Subject: [PATCH 3/3] Update detacher.go --- pkg/util/ranger/detacher.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go index 32a9d3b72c4d4..2272612badf79 100644 --- a/pkg/util/ranger/detacher.go +++ b/pkg/util/ranger/detacher.go @@ -463,6 +463,9 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi // TODO: we will optimize it later. res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds) res.Ranges = ranges + // Choosing between point ranges and bestCNF is needed since bestCNF does not cover the intersection + // of all conjuncts. Even when we add support for intersection, it could be turned off by a flag or it could be + // incomplete due to a long list of conjuncts. if bestCNFItemRes != nil && res != nil && len(res.Ranges) != 0 { bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges) pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges)