From 3a98da6d6c854c0c5c9bb75f67f40468e373e0d9 Mon Sep 17 00:00:00 2001
From: ghazalfamilyusa <ghazalfamilyusa@gmail.com>
Date: Mon, 10 Jun 2024 15:28:42 -0700
Subject: [PATCH 1/3] Optimizer:Fix range extraction for CNF(conjunctive normal
 form) predicates

---
 pkg/expression/util.go                        |   6 +-
 pkg/planner/core/casetest/index/index_test.go |  33 +++-
 pkg/planner/core/casetest/index/main_test.go  |   5 +
 .../index/testdata/index_range_in.json        |  23 +++
 .../index/testdata/index_range_out.json       | 144 ++++++++++++++++++
 pkg/util/ranger/detacher.go                   |  13 ++
 pkg/util/ranger/types.go                      |  86 +++++++++++
 tests/integrationtest/r/util/ranger.result    |  13 +-
 8 files changed, 313 insertions(+), 10 deletions(-)
 create mode 100644 pkg/planner/core/casetest/index/testdata/index_range_in.json
 create mode 100644 pkg/planner/core/casetest/index/testdata/index_range_out.json

diff --git a/pkg/expression/util.go b/pkg/expression/util.go
index f5f81c0ef82b8..308805f62f653 100644
--- a/pkg/expression/util.go
+++ b/pkg/expression/util.go
@@ -17,6 +17,7 @@ package expression
 import (
 	"bytes"
 	"context"
+	"fmt"
 	"math"
 	"strconv"
 	"strings"
@@ -992,7 +993,10 @@ func containOuterNot(expr Expression, not bool) bool {
 // Contains tests if `exprs` contains `e`.
 func Contains(exprs []Expression, e Expression) bool {
 	for _, expr := range exprs {
-		if e == expr {
+		// Check string equivalence if one of the expressions is a clone.
+		str1 := fmt.Sprintf("", e)
+		str2 := fmt.Sprintf("", expr)
+		if e == expr || (str1 == str2) {
 			return true
 		}
 	}
diff --git a/pkg/planner/core/casetest/index/index_test.go b/pkg/planner/core/casetest/index/index_test.go
index 8e50923a16824..a72e670d77262 100644
--- a/pkg/planner/core/casetest/index/index_test.go
+++ b/pkg/planner/core/casetest/index/index_test.go
@@ -85,13 +85,42 @@ func TestInvisibleIndex(t *testing.T) {
 	tk.MustExec("use test")
 	tk.MustExec("CREATE TABLE t1 ( a INT, KEY( a ) INVISIBLE );")
 	tk.MustExec("INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);")
-	tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check(
+	tk.MustQuery(`select a FROM t1;`).Check(
 		testkit.Rows(
 			`TableReader_5 10000.00 root  data:TableFullScan_4`,
 			`└─TableFullScan_4 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo`))
 	tk.MustExec("set session tidb_opt_use_invisible_indexes=on;")
-	tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check(
+	tk.MustQuery(`select a FROM t1;`).Check(
 		testkit.Rows(
 			`IndexReader_7 10000.00 root  index:IndexFullScan_6`,
 			`└─IndexFullScan_6 10000.00 cop[tikv] table:t1, index:a(a) keep order:false, stats:pseudo`))
 }
+
+func TestRangeDerivation(t *testing.T) {
+	store := testkit.CreateMockStore(t)
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec("use test")
+	tk.MustExec("create table t1 (a1 int, b1 int, c1 int, primary key pkx (a1,b1));")
+	tk.MustExec("create table t1char (a1 char(5), b1 char(5), c1 int, primary key pkx (a1,b1));")
+	tk.MustExec("create table t(a int, b int, c int, primary key(a,b));")
+	tk.MustExec("create table tuk (a int, b int, c int, unique key (a, b, c));")
+	tk.MustExec("set @@session.tidb_regard_null_as_point=false;")
+
+	var input []string
+	var output []struct {
+		SQL  string
+		Plan []string
+	}
+	indexRangeSuiteData := GetIndexRangeSuiteData()
+	indexRangeSuiteData.LoadTestCases(t, &input, &output)
+	indexRangeSuiteData.LoadTestCases(t, &input, &output)
+	for i, sql := range input {
+		plan := tk.MustQuery("explain format = 'brief' " + sql)
+		testdata.OnRecord(func() {
+			output[i].SQL = sql
+			output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows())
+		})
+		plan.Check(testkit.Rows(output[i].Plan...))
+	}
+
+}
diff --git a/pkg/planner/core/casetest/index/main_test.go b/pkg/planner/core/casetest/index/main_test.go
index cad74fcb20ddb..e8ce32fdefc7e 100644
--- a/pkg/planner/core/casetest/index/main_test.go
+++ b/pkg/planner/core/casetest/index/main_test.go
@@ -31,6 +31,7 @@ func TestMain(m *testing.M) {
 
 	flag.Parse()
 	testDataMap.LoadTestSuiteData("testdata", "integration_suite")
+	testDataMap.LoadTestSuiteData("testdata", "index_range")
 
 	opts := []goleak.Option{
 		goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"),
@@ -50,6 +51,10 @@ func TestMain(m *testing.M) {
 	goleak.VerifyTestMain(testmain.WrapTestingM(m, callback), opts...)
 }
 
+func GetIndexRangeSuiteData() testdata.TestData {
+	return testDataMap["index_range"]
+}
+
 func GetIntegrationSuiteData() testdata.TestData {
 	return testDataMap["integration_suite"]
 }
diff --git a/pkg/planner/core/casetest/index/testdata/index_range_in.json b/pkg/planner/core/casetest/index/testdata/index_range_in.json
new file mode 100644
index 0000000000000..166d811251e2a
--- /dev/null
+++ b/pkg/planner/core/casetest/index/testdata/index_range_in.json
@@ -0,0 +1,23 @@
+[
+  {
+    "name": "TestRangeDerivation",
+    "cases": [
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ",
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct",
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval",
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct",
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)",
+	"select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)",
+	"select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list",
+	"select * from tuk where a<=>null and b>0 and b<2;",
+	"select a,b,c  from tuk where a>3 and b=4 order by a,c;",
+	// Same test cases with char type
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ",
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct",
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval",
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct",
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)",
+	"select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)"
+    ]
+  }
+]
diff --git a/pkg/planner/core/casetest/index/testdata/index_range_out.json b/pkg/planner/core/casetest/index/testdata/index_range_out.json
new file mode 100644
index 0000000000000..f641f39df3f5c
--- /dev/null
+++ b/pkg/planner/core/casetest/index/testdata/index_range_out.json
@@ -0,0 +1,144 @@
+[
+  {
+    "Name": "TestRangeDerivation",
+    "Cases": [
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  (a1>1) or (a1=1 and b1 >= 10) -- simple DNF on (a1,b1) ",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1>1) or (a1=1 and b1 >= 10)) and (c1 > 10) -- -- same as previous example with another conjunct",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1118.52 cop[tikv]  gt(test.t1.c1, 10)",
+          "      └─TableRangeScan 3366.67 cop[tikv] table:t1 range:[1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  (a1>1) or (a1=1 and b1 > 10) -- simple DNF on (a1,b1) with open interval",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1>1) or (a1=1 and b1 > 10)) and (c1 > 10) -- same as previous example with another conjunct",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1118.52 cop[tikv]  gt(test.t1.c1, 10)",
+          "      └─TableRangeScan 3366.67 cop[tikv] table:t1 range:(1 10,1 +inf], (1,+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where  ((a1<10) or (a1=10 and b1 < 20)) -- upper bound on (a1,b1)",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3356.57 cop[tikv] table:t1 range:[-inf,10), [10 -inf,10 20), keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1,PKX) */ count(*) from t1 where ((a1>1) or (a1=1 and b1 > 10)) and ((a1<10) or (a1=10 and b1 < 20)) -- upper and lower bound on (a1,b1)",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1122.61 cop[tikv]  or(gt(test.t1.a1, 1), and(eq(test.t1.a1, 1), gt(test.t1.b1, 10))), or(lt(test.t1.a1, 10), and(eq(test.t1.a1, 10), lt(test.t1.b1, 20)))",
+          "      └─TableRangeScan 1403.26 cop[tikv] table:t1 range:[1,1], (1,10), [10,10], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select * from t where (a,b) in ((1,1),(2,2)) and c = 3 -- IN list",
+        "Plan": [
+          "Selection 0.00 root  eq(test.t.c, 3)",
+          "└─Batch_Point_Get 2.00 root table:t, clustered index:PRIMARY(a, b) keep order:false, desc:false"
+        ]
+      },
+      {
+        "SQL": "select * from tuk where a<=>null and b>0 and b<2;",
+        "Plan": [
+          "IndexReader 0.25 root  index:Selection",
+          "└─Selection 0.25 cop[tikv]  eq(test.tuk.b, 1)",
+          "  └─IndexRangeScan 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select a,b,c  from tuk where a>3 and b=4 order by a,c;",
+        "Plan": [
+          "IndexReader 3.33 root  index:Selection",
+          "└─Selection 3.33 cop[tikv]  eq(test.tuk.b, 4)",
+          "  └─IndexRangeScan 3333.33 cop[tikv] table:tuk, index:a(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  (a1>'1') or (a1='1' and b1 >= '10') -- simple DNF on (a1,b1) ",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1>'1') or (a1='1' and b1 >= '10')) and (c1 > '10') -- -- same as previous example with another conjunct",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1118.52 cop[tikv]  gt(test.t1char.c1, 10)",
+          "      └─TableRangeScan 3366.67 cop[tikv] table:t1char range:[\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  (a1>'1') or (a1='1' and b1 > '10') -- simple DNF on (a1,b1) with open interval",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1>'1') or (a1='1' and b1 > '10')) and (c1 > '10') -- same as previous example with another conjunct",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1118.52 cop[tikv]  gt(test.t1char.c1, 10)",
+          "      └─TableRangeScan 3366.67 cop[tikv] table:t1char range:(\"1\" \"10\",\"1\" +inf], (\"1\",+inf], keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where  ((a1<'10') or (a1='10' and b1 < '20')) -- upper bound on (a1,b1)",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─TableRangeScan 3356.57 cop[tikv] table:t1char range:[-inf,\"10\"), [\"10\" -inf,\"10\" \"20\"), keep order:false, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select /*+ USE_INDEX(t1char,PKX) */ count(*) from t1char where ((a1>'1') or (a1='1' and b1 > '10')) and ((a1<'10') or (a1='10' and b1 < '20')) -- upper and lower bound on (a1,b1)",
+        "Plan": [
+          "HashAgg 1.00 root  funcs:count(Column#5)->Column#4",
+          "└─TableReader 1.00 root  data:HashAgg",
+          "  └─HashAgg 1.00 cop[tikv]  funcs:count(1)->Column#5",
+          "    └─Selection 1122.61 cop[tikv]  or(gt(test.t1char.a1, \"1\"), and(eq(test.t1char.a1, \"1\"), gt(test.t1char.b1, \"10\"))), or(lt(test.t1char.a1, \"10\"), and(eq(test.t1char.a1, \"10\"), lt(test.t1char.b1, \"20\")))",
+          "      └─TableRangeScan 1403.26 cop[tikv] table:t1char range:[\"1\",\"1\"], (\"1\",\"10\"), [\"10\",\"10\"], keep order:false, stats:pseudo"
+        ]
+      }
+    ]
+  }
+]
diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go
index 79e3996be3ee5..40211287a9c20 100644
--- a/pkg/util/ranger/detacher.go
+++ b/pkg/util/ranger/detacher.go
@@ -277,6 +277,7 @@ func extractBestCNFItemRanges(sctx *rangerctx.RangerContext, conds []expression.
 			bestRes = curRes
 		}
 	}
+
 	if bestRes != nil && bestRes.rangeResult != nil {
 		bestRes.rangeResult.IsDNFCond = false
 	}
@@ -462,6 +463,18 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 		// TODO: we will optimize it later.
 		res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds)
 		res.Ranges = ranges
+		if bestCNFItemRes != nil {
+			bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges)
+			pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges)
+			// Pick bestCNFIsSubset if it is more selective than point ranges(res).
+			// Only optimization if it is a proper subset bestCNFIsSubset and !pointRangeIsSubset.
+			if bestCNFIsSubset && !pointRangeIsSubset {
+				// Update final result and just update: Ranges, AccessConds and RemainedConds
+				res.RemainedConds = removeConditions(res.RemainedConds, bestCNFItemRes.rangeResult.AccessConds)
+				res.Ranges = bestCNFItemRes.rangeResult.Ranges
+				res.AccessConds = bestCNFItemRes.rangeResult.AccessConds
+			}
+		}
 		return res, nil
 	}
 	for _, cond := range newConditions {
diff --git a/pkg/util/ranger/types.go b/pkg/util/ranger/types.go
index e319173537d28..405ea8959c8d6 100644
--- a/pkg/util/ranger/types.go
+++ b/pkg/util/ranger/types.go
@@ -281,3 +281,89 @@ func formatDatum(d types.Datum, isLeftSide bool) string {
 	}
 	return fmt.Sprintf("%v", d.GetValue())
 }
+
+// Check if a list of Datum is a prefix of another list of Datum. This is useful for checking if
+// lower/upper bound of a range is a subset of another.
+func prefix(tc types.Context, superValue []types.Datum, supValue []types.Datum, length int, collators []collate.Collator) bool {
+	for i := 0; i < length; i++ {
+		cmp, err := superValue[i].Compare(tc, &supValue[i], collators[i])
+		if (err != nil) || (cmp != 0) {
+			return false
+		}
+	}
+	return true
+}
+
+// Check if a list of ranges(subRanges) is a subset of another list of ranges(superRanges).
+// This is true if every range in the first list is a subset of any
+// range in the second list. Also, we check if all elements of superRanges are covered.
+func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool {
+	var subset bool
+	superRangesCovered := make([]bool, len(superRanges))
+	if len(subRanges) == 0 {
+		// Both lists are unrestricted
+		if len(superRanges) == 0 {
+			return true
+		} else {
+			// unrestricted subRanges and restricted superRanges
+			return false
+		}
+	} else if len(superRanges) == 0 {
+		// unrestricted superRanges and restricted subRanges
+		return true
+	}
+
+	for _, subRange := range subRanges {
+		subset = false
+		for i, superRange := range superRanges {
+			if subRange.Subset(tc, superRange) {
+				subset = true
+				superRangesCovered[i] = true
+				break
+			}
+		}
+		if !subset {
+			return false
+		}
+	}
+	for i := 0; i < len(superRangesCovered); i++ {
+		if !superRangesCovered[i] {
+			return false
+		}
+	}
+
+	return true
+}
+
+// check if range(subRange)  is a subset of another range(superRange).
+// This is done by:
+//   - Both subRange and superRange have the same collators. This is not needed for the current code path.
+//     But, it is used here for future use of the function.
+//   - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of subRange.
+//     Thus include checking open/closed inetrvals.
+func (subRange *Range) Subset(tc types.Context, superRange *Range) bool {
+
+	if len(subRange.LowVal) < len(superRange.LowVal) {
+		return false
+	}
+
+	// Make sure both subRange and superRange have the same collations.
+	// The current code path for this function always will have same collation
+	// for subRange and superRange. It is added here for future
+	// use of the function.
+	for i := 0; i < len(superRange.LowVal); i++ {
+		if subRange.Collators[i] != superRange.Collators[i] {
+			return false
+		}
+	}
+
+	// Either superRange is closed or both ranges have the same open/close setting.
+	lowExcludeOK := !superRange.LowExclude || subRange.LowExclude == superRange.LowExclude
+	highExcludeOK := !superRange.HighExclude || subRange.HighExclude == superRange.HighExclude
+	if !lowExcludeOK || !highExcludeOK {
+		return false
+	}
+
+	return prefix(tc, superRange.LowVal, subRange.LowVal, len(superRange.LowVal), subRange.Collators) &&
+		prefix(tc, superRange.HighVal, subRange.HighVal, len(superRange.LowVal), subRange.Collators)
+}
diff --git a/tests/integrationtest/r/util/ranger.result b/tests/integrationtest/r/util/ranger.result
index 7148284aadad8..69f1ae71fb27f 100644
--- a/tests/integrationtest/r/util/ranger.result
+++ b/tests/integrationtest/r/util/ranger.result
@@ -368,9 +368,9 @@ a	b	c
 2	2	3
 explain format='brief' select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3;
 id	estRows	task	access object	operator info
-IndexReader	0.75	root		index:Selection
-└─Selection	0.75	cop[tikv]		eq(util__ranger.t.c, 3), or(eq(util__ranger.t.a, 1), and(eq(util__ranger.t.a, 2), eq(util__ranger.t.b, 2)))
-  └─IndexRangeScan	2.00	cop[tikv]	table:t, index:PRIMARY(a, b, c)	range:[1,1], [2,2], keep order:false
+IndexReader	1.00	root		index:Selection
+└─Selection	1.00	cop[tikv]		eq(util__ranger.t.c, 3)
+  └─IndexRangeScan	2.00	cop[tikv]	table:t, index:PRIMARY(a, b, c)	range:[1,1], [2 2,2 2], keep order:false
 select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3;
 a	b	c
 2	2	3
@@ -415,10 +415,9 @@ a	b	c
 2	2	3
 explain format='brief' select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3;
 id	estRows	task	access object	operator info
-IndexLookUp	0.75	root		
-├─Selection(Build)	2.00	cop[tikv]		or(eq(util__ranger.t.a, 1), and(eq(util__ranger.t.a, 2), eq(util__ranger.t.b, 2)))
-│ └─IndexRangeScan	2.00	cop[tikv]	table:t, index:PRIMARY(a, b)	range:[1,1], [2,2], keep order:false
-└─Selection(Probe)	0.75	cop[tikv]		eq(util__ranger.t.c, 3)
+IndexLookUp	1.00	root		
+├─IndexRangeScan(Build)	2.00	cop[tikv]	table:t, index:PRIMARY(a, b)	range:[1,1], [2 2,2 2], keep order:false
+└─Selection(Probe)	1.00	cop[tikv]		eq(util__ranger.t.c, 3)
   └─TableRowIDScan	2.00	cop[tikv]	table:t	keep order:false
 select * from t use index(primary) where ((a = 1) or (a = 2 and b = 2)) and c = 3;
 a	b	c

From aa038315f630511d80f4abc8a0d719811a8f001c Mon Sep 17 00:00:00 2001
From: ghazalfamilyusa <ghazalfamilyusa@gmail.com>
Date: Mon, 10 Jun 2024 15:52:10 -0700
Subject: [PATCH 2/3] end

---
 pkg/bindinfo/global_handle_test.go            |  9 ++--
 pkg/expression/util.go                        |  9 ++--
 pkg/planner/core/casetest/index/BUILD.bazel   |  1 +
 pkg/planner/core/casetest/index/index_test.go |  5 +--
 .../testdata/partition_pruner_out.json        |  4 +-
 pkg/util/ranger/detacher.go                   |  4 +-
 pkg/util/ranger/types.go                      | 43 ++++++++-----------
 7 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/pkg/bindinfo/global_handle_test.go b/pkg/bindinfo/global_handle_test.go
index 8eb4113ba0b5e..3325e9be37898 100644
--- a/pkg/bindinfo/global_handle_test.go
+++ b/pkg/bindinfo/global_handle_test.go
@@ -550,11 +550,10 @@ func TestSetVarFixControlWithBinding(t *testing.T) {
 	tk.MustExec(`create table t(id int, a varchar(100), b int, c int, index idx_ab(a, b))`)
 	tk.MustQuery(`explain select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`).Check(
 		testkit.Rows(
-			`IndexLookUp_12 0.01 root  `,
-			`├─Selection_10(Build) 0.02 cop[tikv]  or(eq(test.t.a, "xx"), and(eq(test.t.a, "kk"), eq(test.t.b, 1)))`,
-			`│ └─IndexRangeScan_8 20.00 cop[tikv] table:t, index:idx_ab(a, b) range:["kk","kk"], ["xx","xx"], keep order:false, stats:pseudo`,
-			`└─Selection_11(Probe) 0.01 cop[tikv]  eq(test.t.c, 10)`,
-			`  └─TableRowIDScan_9 0.02 cop[tikv] table:t keep order:false, stats:pseudo`))
+			`IndexLookUp_11 0.01 root  `,
+			`├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:["kk" 1,"kk" 1], ["xx","xx"], keep order:false, stats:pseudo`,
+			`└─Selection_10(Probe) 0.01 cop[tikv]  eq(test.t.c, 10)`,
+			`  └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo`))
 
 	tk.MustExec(`create global binding using select /*+ set_var(tidb_opt_fix_control='44389:ON') */ * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`)
 	tk.MustQuery(`show warnings`).Check(testkit.Rows()) // no warning
diff --git a/pkg/expression/util.go b/pkg/expression/util.go
index 308805f62f653..43cb1a009553f 100644
--- a/pkg/expression/util.go
+++ b/pkg/expression/util.go
@@ -17,7 +17,6 @@ package expression
 import (
 	"bytes"
 	"context"
-	"fmt"
 	"math"
 	"strconv"
 	"strings"
@@ -994,9 +993,11 @@ func containOuterNot(expr Expression, not bool) bool {
 func Contains(exprs []Expression, e Expression) bool {
 	for _, expr := range exprs {
 		// Check string equivalence if one of the expressions is a clone.
-		str1 := fmt.Sprintf("", e)
-		str2 := fmt.Sprintf("", expr)
-		if e == expr || (str1 == str2) {
+		sameString := false
+		if e != nil && expr != nil {
+			sameString = (e.String() == expr.String())
+		}
+		if e == expr || sameString {
 			return true
 		}
 	}
diff --git a/pkg/planner/core/casetest/index/BUILD.bazel b/pkg/planner/core/casetest/index/BUILD.bazel
index 96df3221e09ff..3f264708225fa 100644
--- a/pkg/planner/core/casetest/index/BUILD.bazel
+++ b/pkg/planner/core/casetest/index/BUILD.bazel
@@ -9,6 +9,7 @@ go_test(
     ],
     data = glob(["testdata/**"]),
     flaky = True,
+    shard_count = 3,
     deps = [
         "//pkg/testkit",
         "//pkg/testkit/testdata",
diff --git a/pkg/planner/core/casetest/index/index_test.go b/pkg/planner/core/casetest/index/index_test.go
index a72e670d77262..3909b655667a4 100644
--- a/pkg/planner/core/casetest/index/index_test.go
+++ b/pkg/planner/core/casetest/index/index_test.go
@@ -85,12 +85,12 @@ func TestInvisibleIndex(t *testing.T) {
 	tk.MustExec("use test")
 	tk.MustExec("CREATE TABLE t1 ( a INT, KEY( a ) INVISIBLE );")
 	tk.MustExec("INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);")
-	tk.MustQuery(`select a FROM t1;`).Check(
+	tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check(
 		testkit.Rows(
 			`TableReader_5 10000.00 root  data:TableFullScan_4`,
 			`└─TableFullScan_4 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo`))
 	tk.MustExec("set session tidb_opt_use_invisible_indexes=on;")
-	tk.MustQuery(`select a FROM t1;`).Check(
+	tk.MustQuery(`EXPLAIN SELECT a FROM t1;`).Check(
 		testkit.Rows(
 			`IndexReader_7 10000.00 root  index:IndexFullScan_6`,
 			`└─IndexFullScan_6 10000.00 cop[tikv] table:t1, index:a(a) keep order:false, stats:pseudo`))
@@ -122,5 +122,4 @@ func TestRangeDerivation(t *testing.T) {
 		})
 		plan.Check(testkit.Rows(output[i].Plan...))
 	}
-
 }
diff --git a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
index ee3b7bb9d2e0d..5721832e2b05e 100644
--- a/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
+++ b/pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json
@@ -480,8 +480,8 @@
         "IndexPlan": [
           "HashJoin 0.03 root  CARTESIAN inner join",
           "├─IndexReader(Build) 0.01 root partition:p0 index:Selection",
-          "│ └─Selection 0.01 cop[tikv]  eq(test_partition_1.t1.id, 7), or(eq(test_partition_1.t1.a, 1), and(eq(test_partition_1.t1.a, 3), in(test_partition_1.t1.b, 3, 5)))",
-          "│   └─IndexRangeScan 20.00 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3,3], keep order:false, stats:pseudo",
+          "│ └─Selection 0.01 cop[tikv]  eq(test_partition_1.t1.id, 7)",
+          "│   └─IndexRangeScan 10.20 cop[tikv] table:t1, index:a(a, b, id) range:[1,1], [3 3,3 3], [3 5,3 5], keep order:false, stats:pseudo",
           "└─IndexReader(Probe) 3.00 root partition:p1 index:IndexRangeScan",
           "  └─IndexRangeScan 3.00 cop[tikv] table:t2, index:a(a, b, id) range:[6 7 7,6 7 7], [7 7 7,7 7 7], [8 7 7,8 7 7], keep order:false, stats:pseudo"
         ]
diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go
index 40211287a9c20..32a9d3b72c4d4 100644
--- a/pkg/util/ranger/detacher.go
+++ b/pkg/util/ranger/detacher.go
@@ -463,11 +463,11 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 		// TODO: we will optimize it later.
 		res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds)
 		res.Ranges = ranges
-		if bestCNFItemRes != nil {
+		if bestCNFItemRes != nil && res != nil && len(res.Ranges) != 0 {
 			bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges)
 			pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges)
 			// Pick bestCNFIsSubset if it is more selective than point ranges(res).
-			// Only optimization if it is a proper subset bestCNFIsSubset and !pointRangeIsSubset.
+			// Apply optimization if bestCNFItemRes is a proper subset of point ranges.
 			if bestCNFIsSubset && !pointRangeIsSubset {
 				// Update final result and just update: Ranges, AccessConds and RemainedConds
 				res.RemainedConds = removeConditions(res.RemainedConds, bestCNFItemRes.rangeResult.AccessConds)
diff --git a/pkg/util/ranger/types.go b/pkg/util/ranger/types.go
index 405ea8959c8d6..ef8977baa080a 100644
--- a/pkg/util/ranger/types.go
+++ b/pkg/util/ranger/types.go
@@ -294,26 +294,20 @@ func prefix(tc types.Context, superValue []types.Datum, supValue []types.Datum,
 	return true
 }
 
-// Check if a list of ranges(subRanges) is a subset of another list of ranges(superRanges).
+// Subset checks if a list of ranges(rs) is a subset of another list of ranges(superRanges).
 // This is true if every range in the first list is a subset of any
 // range in the second list. Also, we check if all elements of superRanges are covered.
-func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool {
+func (rs Ranges) Subset(tc types.Context, superRanges Ranges) bool {
 	var subset bool
 	superRangesCovered := make([]bool, len(superRanges))
-	if len(subRanges) == 0 {
-		// Both lists are unrestricted
-		if len(superRanges) == 0 {
-			return true
-		} else {
-			// unrestricted subRanges and restricted superRanges
-			return false
-		}
+	if len(rs) == 0 {
+		return len(superRanges) == 0
 	} else if len(superRanges) == 0 {
-		// unrestricted superRanges and restricted subRanges
+		// unrestricted superRanges and restricted rs
 		return true
 	}
 
-	for _, subRange := range subRanges {
+	for _, subRange := range rs {
 		subset = false
 		for i, superRange := range superRanges {
 			if subRange.Subset(tc, superRange) {
@@ -335,35 +329,34 @@ func (subRanges Ranges) Subset(tc types.Context, superRanges Ranges) bool {
 	return true
 }
 
-// check if range(subRange)  is a subset of another range(superRange).
+// Subset for Range type, check if range(ran)  is a subset of another range(superRange).
 // This is done by:
-//   - Both subRange and superRange have the same collators. This is not needed for the current code path.
+//   - Both ran and superRange have the same collators. This is not needed for the current code path.
 //     But, it is used here for future use of the function.
-//   - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of subRange.
+//   - Checking if the lower/upper bound of superRange covers the corresponding lower/upper bound of ran.
 //     Thus include checking open/closed inetrvals.
-func (subRange *Range) Subset(tc types.Context, superRange *Range) bool {
-
-	if len(subRange.LowVal) < len(superRange.LowVal) {
+func (ran *Range) Subset(tc types.Context, superRange *Range) bool {
+	if len(ran.LowVal) < len(superRange.LowVal) {
 		return false
 	}
 
-	// Make sure both subRange and superRange have the same collations.
+	// Make sure both ran and superRange have the same collations.
 	// The current code path for this function always will have same collation
-	// for subRange and superRange. It is added here for future
+	// for ran and superRange. It is added here for future
 	// use of the function.
 	for i := 0; i < len(superRange.LowVal); i++ {
-		if subRange.Collators[i] != superRange.Collators[i] {
+		if ran.Collators[i] != superRange.Collators[i] {
 			return false
 		}
 	}
 
 	// Either superRange is closed or both ranges have the same open/close setting.
-	lowExcludeOK := !superRange.LowExclude || subRange.LowExclude == superRange.LowExclude
-	highExcludeOK := !superRange.HighExclude || subRange.HighExclude == superRange.HighExclude
+	lowExcludeOK := !superRange.LowExclude || ran.LowExclude == superRange.LowExclude
+	highExcludeOK := !superRange.HighExclude || ran.HighExclude == superRange.HighExclude
 	if !lowExcludeOK || !highExcludeOK {
 		return false
 	}
 
-	return prefix(tc, superRange.LowVal, subRange.LowVal, len(superRange.LowVal), subRange.Collators) &&
-		prefix(tc, superRange.HighVal, subRange.HighVal, len(superRange.LowVal), subRange.Collators)
+	return prefix(tc, superRange.LowVal, ran.LowVal, len(superRange.LowVal), ran.Collators) &&
+		prefix(tc, superRange.HighVal, ran.HighVal, len(superRange.LowVal), ran.Collators)
 }

From 9c7f0c49a729a7fe80ab74409d8329b8030b7445 Mon Sep 17 00:00:00 2001
From: Ahmad Ghazal <113858565+ghazalfamilyusa@users.noreply.github.com>
Date: Thu, 13 Jun 2024 20:42:56 -0700
Subject: [PATCH 3/3] Update detacher.go

---
 pkg/util/ranger/detacher.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pkg/util/ranger/detacher.go b/pkg/util/ranger/detacher.go
index 32a9d3b72c4d4..2272612badf79 100644
--- a/pkg/util/ranger/detacher.go
+++ b/pkg/util/ranger/detacher.go
@@ -463,6 +463,9 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 		// TODO: we will optimize it later.
 		res.RemainedConds = AppendConditionsIfNotExist(res.RemainedConds, remainedConds)
 		res.Ranges = ranges
+		// Choosing between point ranges and bestCNF is needed since bestCNF does not cover the intersection
+		// of all conjuncts. Even when we add support for intersection, it could be turned off by a flag or it could be
+		// incomplete due to a long list of conjuncts.
 		if bestCNFItemRes != nil && res != nil && len(res.Ranges) != 0 {
 			bestCNFIsSubset := bestCNFItemRes.rangeResult.Ranges.Subset(d.sctx.TypeCtx, res.Ranges)
 			pointRangeIsSubset := res.Ranges.Subset(d.sctx.TypeCtx, bestCNFItemRes.rangeResult.Ranges)