From d9d8ce23a8c9ea0ae9099916010fe9d5c42f2005 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Wed, 14 Jul 2021 16:32:20 +0800
Subject: [PATCH 01/21] refine index back factor of skyline prunning

---
 planner/core/find_best_task.go    | 31 +++++++++++++++++++++++--------
 planner/core/logical_plan_test.go |  8 ++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 251670ed14a9a..a261621fe1a76 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -415,10 +415,11 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {
 
 // candidatePath is used to maintain required info for skyline pruning.
 type candidatePath struct {
-	path         *util.AccessPath
-	columnSet    *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
-	isSingleScan bool
-	isMatchProp  bool
+	path               *util.AccessPath
+	accessCondsColSet  *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
+	indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
+	isSingleScan       bool
+	isMatchProp        bool
 }
 
 // compareColumnSet will compares the two set. The last return value is used to indicate
@@ -451,6 +452,16 @@ func compareBool(l, r bool) int {
 	return 1
 }
 
+func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
+	result := compareBool(lhs.isSingleScan, rhs.isSingleScan)
+	if result == 0 && !lhs.isSingleScan {
+		// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
+		// to compare how many table rows will be accessed.
+		return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
+	}
+	return result, true
+}
+
 // compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
 // (1): the set of columns that occurred in the access condition,
 // (2): whether or not it matches the physical property
@@ -458,11 +469,14 @@ func compareBool(l, r bool) int {
 // If `x` is not worse than `y` at all factors,
 // and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
 func compareCandidates(lhs, rhs *candidatePath) int {
-	setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
+	setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet)
+	if !comparable {
+		return 0
+	}
+	scanResult, comparable := compareIndexBack(lhs, rhs)
 	if !comparable {
 		return 0
 	}
-	scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan)
 	matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
 	sum := setsResult + scanResult + matchResult
 	if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
@@ -499,7 +513,7 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph
 			}
 		}
 	}
-	candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
+	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
 	candidate.isSingleScan = true
 	return candidate
 }
@@ -519,7 +533,8 @@ func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.Ph
 			}
 		}
 	}
-	candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
+	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
+	candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
 	candidate.isSingleScan = isSingleScan
 	return candidate
 }
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index bb6786a7958b3..2f6fc2743ae77 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -1700,6 +1700,14 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 			sql:    "select count(1) from t",
 			result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix",
 		},
+		{
+			sql:    "select * from t where e_str = 'hi' order by c",
+			result: "PRIMARY_KEY,c_d_e_str,c_d_e_str_prefix",
+		},
+		{
+			sql:    "select * from t where f > 3 and g = 5",
+			result: "PRIMARY_KEY,g,f_g",
+		},
 	}
 	ctx := context.TODO()
 	for i, tt := range tests {

From b45895a8a598a6930b6114ba39369f36dd9b4d42 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Wed, 14 Jul 2021 16:47:49 +0800
Subject: [PATCH 02/21] fix test case

---
 planner/core/logical_plan_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index 2f6fc2743ae77..ae2ad51c9f88f 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -1694,7 +1694,7 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 		},
 		{
 			sql:    "select * from t where f > 1 and g > 1",
-			result: "PRIMARY_KEY,f,g,f_g",
+			result: "PRIMARY_KEY,g,f_g",
 		},
 		{
 			sql:    "select count(1) from t",

From 25b39f53c3e3952676f1375021ea0eac24125329 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Wed, 14 Jul 2021 17:49:17 +0800
Subject: [PATCH 03/21] enhance isMatchProp

---
 planner/core/exhaust_physical_plans.go |  2 +-
 planner/core/find_best_task.go         | 57 +++++++++++++-------------
 planner/core/logical_plan_test.go      |  8 ++++
 planner/core/logical_plans.go          |  2 +
 planner/util/path.go                   |  2 +
 util/ranger/detacher.go                | 26 +++++++++---
 6 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go
index fb06f570fdce7..cfa114c47599b 100644
--- a/planner/core/exhaust_physical_plans.go
+++ b/planner/core/exhaust_physical_plans.go
@@ -1251,7 +1251,7 @@ func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSo
 	var remainedEqOrIn []expression.Expression
 	// Extract the eq/in functions of possible join key.
 	// you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value.
-	usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition(
+	usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, _ = ranger.ExtractEqAndInCondition(
 		innerPlan.ctx, innerPlan.pushedDownConds,
 		ijHelper.curNotUsedIndexCols,
 		ijHelper.curNotUsedColLens,
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index a261621fe1a76..307a7c4c3ed30 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -488,31 +488,42 @@ func compareCandidates(lhs, rhs *candidatePath) int {
 	return 0
 }
 
-func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
-	candidate := &candidatePath{path: path}
+func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool {
+	var isMatchProp bool
 	if path.IsIntHandlePath {
 		pkCol := ds.getPKIsHandleCol()
 		if len(prop.SortItems) == 1 && pkCol != nil {
-			candidate.isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
+			isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
 			if path.StoreType == kv.TiFlash {
-				candidate.isMatchProp = candidate.isMatchProp && !prop.SortItems[0].Desc
+				isMatchProp = isMatchProp && !prop.SortItems[0].Desc
 			}
 		}
-	} else {
-		all, _ := prop.AllSameOrder()
-		// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
-		// it needs not to keep order for index scan.
-		if !prop.IsEmpty() && all {
-			for i, col := range path.IdxCols {
-				if col.Equal(nil, prop.SortItems[0].Col) {
-					candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
-					break
-				} else if i >= path.EqCondCount {
-					break
-				}
+		return isMatchProp
+	}
+	// TODO: do we need to consider TiFlash here?
+	all, _ := prop.AllSameOrder()
+	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
+	// it needs not to keep order for index scan.
+	if !prop.IsEmpty() && all {
+		for _, sortItem := range prop.SortItems {
+			var i, j int
+			if i < len(path.EqualCols) && sortItem.Col.Equal(nil, path.EqualCols[i]) {
+				i++
+				j++
+			} else if j < len(path.IdxCols) && path.IdxColLens[j] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[j]) {
+				j++
+			} else {
+				isMatchProp = false
+				break
 			}
 		}
 	}
+	return isMatchProp
+}
+
+func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
+	candidate := &candidatePath{path: path}
+	candidate.isMatchProp = ds.isMatchProp(path, prop)
 	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
 	candidate.isSingleScan = true
 	return candidate
@@ -520,19 +531,7 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph
 
 func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath {
 	candidate := &candidatePath{path: path}
-	all, _ := prop.AllSameOrder()
-	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
-	// it needs not to keep order for index scan.
-	if !prop.IsEmpty() && all {
-		for i, col := range path.IdxCols {
-			if col.Equal(nil, prop.SortItems[0].Col) {
-				candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
-				break
-			} else if i >= path.EqCondCount {
-				break
-			}
-		}
-	}
+	candidate.isMatchProp = ds.isMatchProp(path, prop)
 	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
 	candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
 	candidate.isSingleScan = isSingleScan
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index ae2ad51c9f88f..b3bdbbbf3b1f8 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -1708,6 +1708,14 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 			sql:    "select * from t where f > 3 and g = 5",
 			result: "PRIMARY_KEY,g,f_g",
 		},
+		{
+			sql:    "select * from t where d = 3 order by c, e",
+			result: "PRIMARY_KEY,c_d_e",
+		},
+		{
+			sql:    "select * from t where c > 1 and d = 1 and e > 1 and e_str = 'hi' order by c, e",
+			result: "PRIMARY_KEY,c_d_e,c_d_e_str,c_d_e_str_prefix",
+		},
 	}
 	ctx := context.TODO()
 	for i, tt := range tests {
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 9113539116e8d..e6351c762d141 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -675,6 +675,7 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
+		path.EqualCols = res.EqualCols
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return false, err
@@ -854,6 +855,7 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
+		path.EqualCols = res.EqualCols
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return err
diff --git a/planner/util/path.go b/planner/util/path.go
index f6fa0b47e0f51..79f04374703c6 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -43,6 +43,8 @@ type AccessPath struct {
 	EqOrInCondCount int
 	IndexFilters    []expression.Expression
 	TableFilters    []expression.Expression
+	// EqualCols is the columns evaluated as constant under the given conditions.
+	EqualCols []*expression.Column
 	// PartialIndexPaths store all index access paths.
 	// If there are extra filters, store them in TableFilters.
 	PartialIndexPaths []*AccessPath
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index f26e96c42d7f8..02aa33d6698ea 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -254,7 +254,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	)
 	res := &DetachRangeResult{}
 
-	accessConds, filterConds, newConditions, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
+	accessConds, filterConds, newConditions, equalCols, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
 	if emptyRange {
 		return res, nil
 	}
@@ -286,6 +286,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	res.Ranges = ranges
 	res.AccessConds = accessConds
 	res.RemainedConds = filterConds
+	res.EqualCols = equalCols
 	if eqOrInCount == len(d.cols) || len(newConditions) == 0 {
 		res.RemainedConds = append(res.RemainedConds, newConditions...)
 		return res, nil
@@ -465,15 +466,17 @@ func allEqOrIn(expr expression.Expression) bool {
 // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column.
 // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column.
 //   e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions.
+// equalCols: equalCols indicates which columns are evaluated as constant under the given conditions.
 // bool: indicate whether there's nil range when merging eq and in conditions.
-func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression,
-	cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) {
+func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
+	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*expression.Column, bool) {
 	var filters []expression.Expression
 	rb := builder{sc: sctx.GetSessionVars().StmtCtx}
 	accesses := make([]expression.Expression, len(cols))
 	points := make([][]*point, len(cols))
 	mergedAccesses := make([]expression.Expression, len(cols))
 	newConditions := make([]expression.Expression, 0, len(conditions))
+	equalCols := make([]*expression.Column, 0, len(cols))
 	offsets := make([]int, len(conditions))
 	for i, cond := range conditions {
 		offset := getPotentialEqOrInColOffset(cond, cols)
@@ -494,7 +497,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 		points[offset] = rb.intersection(points[offset], rb.build(cond))
 		// Early termination if false expression found
 		if len(points[offset]) == 0 {
-			return nil, nil, nil, true
+			return nil, nil, nil, nil, true
 		}
 	}
 	for i, ma := range mergedAccesses {
@@ -514,7 +517,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			accesses[i] = nil
 		} else if len(points[i]) == 0 {
 			// Early termination if false expression found
-			return nil, nil, nil, true
+			return nil, nil, nil, nil, true
 		} else {
 			// All Intervals are single points
 			accesses[i] = points2EqOrInCond(sctx, points[i], cols[i])
@@ -527,6 +530,15 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			newConditions = append(newConditions, conditions[i])
 		}
 	}
+	for _, cond := range accesses {
+		if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) {
+			if col, ok := f.GetArgs()[0].(*expression.Column); ok {
+				equalCols = append(equalCols, col)
+			} else if col, ok := f.GetArgs()[1].(*expression.Column); ok {
+				equalCols = append(equalCols, col)
+			}
+		}
+	}
 	for i, cond := range accesses {
 		if cond == nil {
 			accesses = accesses[:i]
@@ -546,7 +558,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 	}
 	// We should remove all accessConds, so that they will not be added to filter conditions.
 	newConditions = removeAccessConditions(newConditions, accesses)
-	return accesses, filters, newConditions, false
+	return accesses, filters, newConditions, equalCols, false
 }
 
 // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF.
@@ -619,6 +631,8 @@ type DetachRangeResult struct {
 	AccessConds []expression.Expression
 	// RemainedConds is the filter conditions which should be kept after access.
 	RemainedConds []expression.Expression
+	// EqualCols is the columns evaluated as constant under the given conditions.
+	EqualCols []*expression.Column
 	// EqCondCount is the number of equal conditions extracted.
 	EqCondCount int
 	// EqOrInCount is the number of equal/in conditions extracted.

From 6ad7d5c22424d35dcdbefc4a33915eee78a05c4d Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 15 Jul 2021 12:41:29 +0800
Subject: [PATCH 04/21] fix ut

---
 planner/core/find_best_task.go    | 24 ++++++++++++++++--------
 planner/core/logical_plan_test.go | 10 +++++-----
 planner/core/mock.go              |  4 ++--
 planner/util/path.go              |  6 +++---
 util/ranger/detacher.go           | 20 ++++++++++----------
 5 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 307a7c4c3ed30..6bcc4edd93c8e 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -501,18 +501,26 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 		return isMatchProp
 	}
 	// TODO: do we need to consider TiFlash here?
+	// TODO: check is it ok to cache the optimization?
 	all, _ := prop.AllSameOrder()
 	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
 	// it needs not to keep order for index scan.
-	if !prop.IsEmpty() && all {
+	if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
+		isMatchProp = true
+		i := 0
 		for _, sortItem := range prop.SortItems {
-			var i, j int
-			if i < len(path.EqualCols) && sortItem.Col.Equal(nil, path.EqualCols[i]) {
-				i++
-				j++
-			} else if j < len(path.IdxCols) && path.IdxColLens[j] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[j]) {
-				j++
-			} else {
+			found := false
+			for ; i < len(path.IdxCols); i++ {
+				if path.IdxColLens[i] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[i]) {
+					found = true
+					i++
+					break
+				}
+				if !path.EqualCols[i] {
+					break
+				}
+			}
+			if !found {
 				isMatchProp = false
 				break
 			}
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index b3bdbbbf3b1f8..41626ef4bef86 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -1701,11 +1701,11 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 			result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix",
 		},
 		{
-			sql:    "select * from t where e_str = 'hi' order by c",
-			result: "PRIMARY_KEY,c_d_e_str,c_d_e_str_prefix",
+			sql:    "select * from t where f > 3 and g = 5",
+			result: "PRIMARY_KEY,g,f_g",
 		},
 		{
-			sql:    "select * from t where f > 3 and g = 5",
+			sql:    "select * from t where g = 5 order by f",
 			result: "PRIMARY_KEY,g,f_g",
 		},
 		{
@@ -1713,8 +1713,8 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
 			result: "PRIMARY_KEY,c_d_e",
 		},
 		{
-			sql:    "select * from t where c > 1 and d = 1 and e > 1 and e_str = 'hi' order by c, e",
-			result: "PRIMARY_KEY,c_d_e,c_d_e_str,c_d_e_str_prefix",
+			sql:    "select * from t where d = 1 and f > 1 and g > 1 order by c, e",
+			result: "PRIMARY_KEY,c_d_e,g,f_g",
 		},
 	}
 	ctx := context.TODO()
diff --git a/planner/core/mock.go b/planner/core/mock.go
index 42e6141980e90..eac3315fcdeac 100644
--- a/planner/core/mock.go
+++ b/planner/core/mock.go
@@ -43,9 +43,9 @@ func newDateType() types.FieldType {
 
 // MockSignedTable is only used for plan related tests.
 func MockSignedTable() *model.TableInfo {
-	// column: a, b, c, d, e, c_str, d_str, e_str, f, g
+	// column: a, b, c, d, e, c_str, d_str, e_str, f, g, h, i_date
 	// PK: a
-	// indices: c_d_e, e, f, g, f_g, c_d_e_str, c_d_e_str_prefix
+	// indices: c_d_e, e, f, g, f_g, c_d_e_str, e_d_c_str_prefix
 	indices := []*model.IndexInfo{
 		{
 			Name: model.NewCIStr("c_d_e"),
diff --git a/planner/util/path.go b/planner/util/path.go
index 79f04374703c6..6dce8d43759fd 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -32,7 +32,9 @@ type AccessPath struct {
 	FullIdxColLens []int
 	IdxCols        []*expression.Column
 	IdxColLens     []int
-	Ranges         []*ranger.Range
+	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
+	EqualCols []bool
+	Ranges []*ranger.Range
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64
@@ -43,8 +45,6 @@ type AccessPath struct {
 	EqOrInCondCount int
 	IndexFilters    []expression.Expression
 	TableFilters    []expression.Expression
-	// EqualCols is the columns evaluated as constant under the given conditions.
-	EqualCols []*expression.Column
 	// PartialIndexPaths store all index access paths.
 	// If there are extra filters, store them in TableFilters.
 	PartialIndexPaths []*AccessPath
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index 02aa33d6698ea..ab236d8c7b377 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -466,17 +466,17 @@ func allEqOrIn(expr expression.Expression) bool {
 // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column.
 // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column.
 //   e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions.
-// equalCols: equalCols indicates which columns are evaluated as constant under the given conditions.
+// equalCols: equalCols indicates whether the column is constant under the given conditions for all index columns.
 // bool: indicate whether there's nil range when merging eq and in conditions.
 func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
-	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*expression.Column, bool) {
+	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []bool, bool) {
 	var filters []expression.Expression
 	rb := builder{sc: sctx.GetSessionVars().StmtCtx}
 	accesses := make([]expression.Expression, len(cols))
 	points := make([][]*point, len(cols))
 	mergedAccesses := make([]expression.Expression, len(cols))
 	newConditions := make([]expression.Expression, 0, len(conditions))
-	equalCols := make([]*expression.Column, 0, len(cols))
+	equalCols := make([]bool, len(cols))
 	offsets := make([]int, len(conditions))
 	for i, cond := range conditions {
 		offset := getPotentialEqOrInColOffset(cond, cols)
@@ -530,12 +530,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			newConditions = append(newConditions, conditions[i])
 		}
 	}
-	for _, cond := range accesses {
+	for i, cond := range accesses {
 		if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) {
-			if col, ok := f.GetArgs()[0].(*expression.Column); ok {
-				equalCols = append(equalCols, col)
-			} else if col, ok := f.GetArgs()[1].(*expression.Column); ok {
-				equalCols = append(equalCols, col)
+			if _, ok := f.GetArgs()[0].(*expression.Column); ok {
+				equalCols[i] = true
+			} else if _, ok := f.GetArgs()[1].(*expression.Column); ok {
+				equalCols[i] = true
 			}
 		}
 	}
@@ -631,8 +631,8 @@ type DetachRangeResult struct {
 	AccessConds []expression.Expression
 	// RemainedConds is the filter conditions which should be kept after access.
 	RemainedConds []expression.Expression
-	// EqualCols is the columns evaluated as constant under the given conditions.
-	EqualCols []*expression.Column
+	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
+	EqualCols []bool
 	// EqCondCount is the number of equal conditions extracted.
 	EqCondCount int
 	// EqOrInCount is the number of equal/in conditions extracted.

From d24648488f0b2134e3a3d637cac9706d5e68ed86 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 15 Jul 2021 15:02:54 +0800
Subject: [PATCH 05/21] add test for isMatchProp

---
 planner/core/find_best_task.go                |  2 +-
 planner/core/integration_test.go              | 23 +++++++++++++++++++
 .../core/testdata/integration_suite_in.json   |  7 ++++++
 .../core/testdata/integration_suite_out.json  | 21 +++++++++++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 6bcc4edd93c8e..549caddf4c2cb 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -516,7 +516,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 					i++
 					break
 				}
-				if !path.EqualCols[i] {
+				if path.EqualCols == nil || !path.EqualCols[i] {
 					break
 				}
 			}
diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index ef078f912abf4..ad1bd91c5089d 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -3952,3 +3952,26 @@ func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C)
 	tk.MustQuery("select * from v5").Check(testkit.Rows("1 2", "3 4"))
 
 }
+
+// TestIsMatchProp is used to test https://github.com/pingcap/tidb/issues/26017.
+func (s *testIntegrationSuite) TestIsMatchProp(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a))")
+
+	var input []string
+	var output []struct {
+		SQL  string
+		Plan []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows())
+		})
+		tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...))
+	}
+}
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index bf2391065a86a..792c7418d32e4 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -310,5 +310,12 @@
       "select sum(1) from s1",
       "select count(1) as cnt from s1 union select count(1) as cnt from s2"
     ]
+  },
+  {
+    "name": "TestIsMatchProp",
+    "cases": [
+      "select a, b, c from t where a > 3 and b = 4 order by a, c",
+      "select * from t where d = 1 and b = 2 order by c, a"
+    ]
   }
 ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 37330e65673c9..37526f17eb6cd 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1636,5 +1636,26 @@
         ]
       }
     ]
+  },
+  {
+    "Name": "TestIsMatchProp",
+    "Cases": [
+      {
+        "SQL": "select a, b, c from t where a > 3 and b = 4 order by a, c",
+        "Plan": [
+          "IndexReader 3.33 root  index:Selection",
+          "└─Selection 3.33 cop[tikv]  eq(test.t.b, 4)",
+          "  └─IndexRangeScan 3333.33 cop[tikv] table:t, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select * from t where d = 1 and b = 2 order by c, a",
+        "Plan": [
+          "IndexReader 0.01 root  index:Selection",
+          "└─Selection 0.01 cop[tikv]  eq(test.t.b, 2)",
+          "  └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_d_c_b_a(d, c, b, a) range:[1,1], keep order:true, stats:pseudo"
+        ]
+      }
+    ]
   }
 ]

From 0685f1555a35400b2b0671bac7f1415bd4ea9fef Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 15 Jul 2021 15:44:21 +0800
Subject: [PATCH 06/21] fmt

---
 planner/util/path.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/util/path.go b/planner/util/path.go
index 6dce8d43759fd..16dd3c844ac3b 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -34,7 +34,7 @@ type AccessPath struct {
 	IdxColLens     []int
 	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
 	EqualCols []bool
-	Ranges []*ranger.Range
+	Ranges    []*ranger.Range
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64

From 486fdc71f32b0278dd9493a7d83abf53c9054dff Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Wed, 21 Jul 2021 10:33:58 +0800
Subject: [PATCH 07/21] add comment

---
 planner/core/find_best_task.go | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 549caddf4c2cb..cbbbcf2a03c0b 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -505,6 +505,18 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 	all, _ := prop.AllSameOrder()
 	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
 	// it needs not to keep order for index scan.
+
+	// Basically, if `prop.SortItems` is the prefix of `path.IdxCols`, then `isMatchProp` is true. However, we need to consider
+	// the situations when some columns of `path.IdxCols` are evaluated as constant. For example:
+	// ```
+	// create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a));
+	// select * from t where a = 1 order by b, c;
+	// select * from t where b = 1 order by a, c;
+	// select * from t where d = 1 and b = 2 order by c, a;
+	// select * from t where d = 1 and b = 2 order by c, b, a;
+	// ```
+	// In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a`
+	// matches the sort order. Hence, we use `path.EqualCols` to deal with the above situations.
 	if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
 		isMatchProp = true
 		i := 0

From 7decc45c8b6d2051f913a698cdb66a8b02918a2a Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Fri, 23 Jul 2021 18:01:23 +0800
Subject: [PATCH 08/21] enhance detection of constant columns

---
 planner/core/find_best_task.go                |   4 +-
 planner/core/integration_test.go              |   5 +-
 planner/core/logical_plans.go                 |  14 +-
 .../core/testdata/integration_suite_in.json   |   7 +-
 .../core/testdata/integration_suite_out.json  |  34 +++-
 planner/util/path.go                          |   6 +-
 util/ranger/detacher.go                       | 165 +++++++++++++-----
 7 files changed, 176 insertions(+), 59 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index cbbbcf2a03c0b..34ce53d77a514 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -516,7 +516,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 	// select * from t where d = 1 and b = 2 order by c, b, a;
 	// ```
 	// In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a`
-	// matches the sort order. Hence, we use `path.EqualCols` to deal with the above situations.
+	// matches the sort order. Hence, we use `path.ConstantCols` to deal with the above situations.
 	if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
 		isMatchProp = true
 		i := 0
@@ -528,7 +528,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 					i++
 					break
 				}
-				if path.EqualCols == nil || !path.EqualCols[i] {
+				if path.ConstantCols == nil || !path.ConstantCols[i] {
 					break
 				}
 			}
diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index ad1bd91c5089d..e55ba43e9dbca 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -3958,8 +3958,9 @@ func (s *testIntegrationSuite) TestIsMatchProp(c *C) {
 	tk := testkit.NewTestKit(c, s.store)
 
 	tk.MustExec("use test")
-	tk.MustExec("drop table if exists t")
-	tk.MustExec("create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a))")
+	tk.MustExec("drop table if exists t1, t2")
+	tk.MustExec("create table t1(a int, b int, c int, d int, index idx_a_b_c(a, b, c))")
+	tk.MustExec("create table t2(a int, b int, c int, d int, index idx_a_b_c_d(a, b, c, d))")
 
 	var input []string
 	var output []struct {
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index e6351c762d141..abc6250f6fdbf 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -675,7 +675,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
-		path.EqualCols = res.EqualCols
+		path.ConstantCols = make([]bool, len(path.IdxCols))
+		if res.ColumnValues != nil {
+			for i := range path.ConstantCols {
+				path.ConstantCols[i] = res.ColumnValues[i] != nil
+			}
+		}
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return false, err
@@ -855,7 +860,12 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
-		path.EqualCols = res.EqualCols
+		path.ConstantCols = make([]bool, len(path.IdxCols))
+		if res.ColumnValues != nil {
+			for i := range path.ConstantCols {
+				path.ConstantCols[i] = res.ColumnValues[i] != nil
+			}
+		}
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
 			return err
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index 792c7418d32e4..e7e9bb12e6001 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -314,8 +314,11 @@
   {
     "name": "TestIsMatchProp",
     "cases": [
-      "select a, b, c from t where a > 3 and b = 4 order by a, c",
-      "select * from t where d = 1 and b = 2 order by c, a"
+      "select a, b, c from t1 where a > 3 and b = 4 order by a, c",
+      "select * from t2 where a = 1 and c = 2 order by b, d",
+      "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c",
+      "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
+      "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d"
     ]
   }
 ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 37526f17eb6cd..93025e19cf817 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1641,19 +1641,41 @@
     "Name": "TestIsMatchProp",
     "Cases": [
       {
-        "SQL": "select a, b, c from t where a > 3 and b = 4 order by a, c",
+        "SQL": "select a, b, c from t1 where a > 3 and b = 4 order by a, c",
         "Plan": [
           "IndexReader 3.33 root  index:Selection",
-          "└─Selection 3.33 cop[tikv]  eq(test.t.b, 4)",
-          "  └─IndexRangeScan 3333.33 cop[tikv] table:t, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
+          "└─Selection 3.33 cop[tikv]  eq(test.t1.b, 4)",
+          "  └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
         ]
       },
       {
-        "SQL": "select * from t where d = 1 and b = 2 order by c, a",
+        "SQL": "select * from t2 where a = 1 and c = 2 order by b, d",
         "Plan": [
           "IndexReader 0.01 root  index:Selection",
-          "└─Selection 0.01 cop[tikv]  eq(test.t.b, 2)",
-          "  └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_d_c_b_a(d, c, b, a) range:[1,1], keep order:true, stats:pseudo"
+          "└─Selection 0.01 cop[tikv]  eq(test.t2.c, 2)",
+          "  └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo"
+        ]
+      },
+      {
+        "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c",
+        "Plan": [
+          "IndexReader 0.03 root  index:IndexRangeScan",
+          "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo",
+        ]
+      },
+      {
+        "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
+        "Plan": [
+          "IndexReader 0.67 root  index:IndexRangeScan",
+          "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo",
+        ]
+      },
+      {
+        "SQL": "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d",
+        "Plan": [
+          "IndexReader 0.00 root  index:Selection",
+          "└─Selection 0.00 cop[tikv]  eq(test.t2.c, 3), or(and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), lt(test.t2.d, 3))), and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), gt(test.t2.d, 6))))",
+          "  └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo"
         ]
       }
     ]
diff --git a/planner/util/path.go b/planner/util/path.go
index 16dd3c844ac3b..5d1af4a9f17a8 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -32,9 +32,9 @@ type AccessPath struct {
 	FullIdxColLens []int
 	IdxCols        []*expression.Column
 	IdxColLens     []int
-	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
-	EqualCols []bool
-	Ranges    []*ranger.Range
+	// ConstantCols indicates whether the column is constant under the given conditions for all index columns.
+	ConstantCols []bool
+	Ranges       []*ranger.Range
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index ab236d8c7b377..3c3258f4ef986 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -185,43 +185,40 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
 // is totally composed of point range filters.
 // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
 // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
-func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, error) {
+func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, []*valueInfo, error) {
 	if len(conds) < 2 {
-		return nil, -1, nil
+		return nil, -1, nil, nil
 	}
 	var r *DetachRangeResult
+	columnValues := make([]*valueInfo, len(cols))
 	maxNumCols := int(0)
 	offset := int(-1)
 	for i, cond := range conds {
 		tmpConds := []expression.Expression{cond}
 		colSets := expression.ExtractColumnSet(tmpConds)
-		origColNum := colSets.Len()
-		if origColNum == 0 {
+		if colSets.Len() == 0 {
 			continue
 		}
-		if l := len(cols); origColNum > l {
-			origColNum = l
-		}
-		currCols := cols[:origColNum]
-		currLengths := lengths[:origColNum]
-		res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, currCols, currLengths)
+		res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, cols, lengths)
 		if err != nil {
-			return nil, -1, err
+			return nil, -1, nil, err
 		}
 		if len(res.Ranges) == 0 {
-			return &DetachRangeResult{}, -1, nil
+			return &DetachRangeResult{}, -1, nil, nil
 		}
+		// take the union of the two columnValues
+		columnValues = unionColumnValues(columnValues, res.ColumnValues, len(cols))
 		if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 {
 			continue
 		}
 		sameLens, allPoints := true, true
 		numCols := int(0)
-		for i, ran := range res.Ranges {
+		for j, ran := range res.Ranges {
 			if !ran.IsPoint(sctx.GetSessionVars().StmtCtx) {
 				allPoints = false
 				break
 			}
-			if i == 0 {
+			if j == 0 {
 				numCols = len(ran.LowVal)
 			} else if numCols != len(ran.LowVal) {
 				sameLens = false
@@ -240,7 +237,21 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
 	if r != nil {
 		r.IsDNFCond = false
 	}
-	return r, offset, nil
+	return r, offset, columnValues, nil
+}
+
+func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo {
+	if lhs == nil {
+		lhs = make([]*valueInfo, numCols)
+	}
+	if rhs != nil {
+		for i, valInfo := range lhs {
+			if valInfo == nil && rhs[i] != nil {
+				lhs[i] = rhs[i]
+			}
+		}
+	}
+	return lhs
 }
 
 // detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and`
@@ -254,7 +265,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	)
 	res := &DetachRangeResult{}
 
-	accessConds, filterConds, newConditions, equalCols, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
+	accessConds, filterConds, newConditions, columnValues, emptyRange := ExtractEqAndInCondition(d.sctx, conditions, d.cols, d.lengths)
 	if emptyRange {
 		return res, nil
 	}
@@ -286,7 +297,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 	res.Ranges = ranges
 	res.AccessConds = accessConds
 	res.RemainedConds = filterConds
-	res.EqualCols = equalCols
+	res.ColumnValues = columnValues
 	if eqOrInCount == len(d.cols) || len(newConditions) == 0 {
 		res.RemainedConds = append(res.RemainedConds, newConditions...)
 		return res, nil
@@ -297,15 +308,17 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 		shouldReserve: d.lengths[eqOrInCount] != types.UnspecifiedLength,
 	}
 	if considerDNF {
-		pointRes, offset, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths)
+		pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths)
 		if err != nil {
 			return nil, err
 		}
+		res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues, len(d.cols))
 		if pointRes != nil {
 			if len(pointRes.Ranges) == 0 {
 				return &DetachRangeResult{}, nil
 			}
 			if len(pointRes.Ranges[0].LowVal) > eqOrInCount {
+				pointRes.ColumnValues = res.ColumnValues
 				res = pointRes
 				pointRanges = pointRes.Ranges
 				eqOrInCount = len(res.Ranges[0].LowVal)
@@ -461,22 +474,42 @@ func allEqOrIn(expr expression.Expression) bool {
 	return false
 }
 
+func extractValueInfo(expr expression.Expression) *valueInfo {
+	if f, ok := expr.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) {
+		getValueInfo := func(c *expression.Constant) *valueInfo {
+			mutable := c.ParamMarker != nil || c.DeferredExpr != nil
+			var value *types.Datum
+			if !mutable {
+				value = &c.Value
+			}
+			return &valueInfo{mutable, value}
+		}
+		if c, ok := f.GetArgs()[0].(*expression.Constant); ok {
+			return getValueInfo(c)
+		}
+		if c, ok := f.GetArgs()[1].(*expression.Constant); ok {
+			return getValueInfo(c)
+		}
+	}
+	return nil
+}
+
 // ExtractEqAndInCondition will split the given condition into three parts by the information of index columns and their lengths.
 // accesses: The condition will be used to build range.
 // filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column.
 // newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column.
 //   e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions.
-// equalCols: equalCols indicates whether the column is constant under the given conditions for all index columns.
+// columnValues: the constant column values for all index columns. columnValues[i] is nil if cols[i] is not constant.
 // bool: indicate whether there's nil range when merging eq and in conditions.
 func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column,
-	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []bool, bool) {
+	lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*valueInfo, bool) {
 	var filters []expression.Expression
 	rb := builder{sc: sctx.GetSessionVars().StmtCtx}
 	accesses := make([]expression.Expression, len(cols))
 	points := make([][]*point, len(cols))
 	mergedAccesses := make([]expression.Expression, len(cols))
 	newConditions := make([]expression.Expression, 0, len(conditions))
-	equalCols := make([]bool, len(cols))
+	columnValues := make([]*valueInfo, len(cols))
 	offsets := make([]int, len(conditions))
 	for i, cond := range conditions {
 		offset := getPotentialEqOrInColOffset(cond, cols)
@@ -505,6 +538,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			if accesses[i] != nil {
 				if allEqOrIn(accesses[i]) {
 					newConditions = append(newConditions, accesses[i])
+					columnValues[i] = extractValueInfo(accesses[i])
 				} else {
 					accesses[i] = nil
 				}
@@ -522,6 +556,9 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			// All Intervals are single points
 			accesses[i] = points2EqOrInCond(sctx, points[i], cols[i])
 			newConditions = append(newConditions, accesses[i])
+			if f, ok := accesses[i].(*expression.ScalarFunction); ok && f.FuncName.L == ast.EQ {
+				columnValues[i] = &valueInfo{mutable: true}
+			}
 			sctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
 		}
 	}
@@ -530,15 +567,6 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			newConditions = append(newConditions, conditions[i])
 		}
 	}
-	for i, cond := range accesses {
-		if f, ok := cond.(*expression.ScalarFunction); ok && (f.FuncName.L == ast.EQ || f.FuncName.L == ast.NullEQ) {
-			if _, ok := f.GetArgs()[0].(*expression.Column); ok {
-				equalCols[i] = true
-			} else if _, ok := f.GetArgs()[1].(*expression.Column); ok {
-				equalCols[i] = true
-			}
-		}
-	}
 	for i, cond := range accesses {
 		if cond == nil {
 			accesses = accesses[:i]
@@ -558,12 +586,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 	}
 	// We should remove all accessConds, so that they will not be added to filter conditions.
 	newConditions = removeAccessConditions(newConditions, accesses)
-	return accesses, filters, newConditions, equalCols, false
+	return accesses, filters, newConditions, columnValues, false
 }
 
 // detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF.
 // We will detach the conditions of every DNF items, then compose them to a DNF.
-func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, bool, error) {
+func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) ([]*Range, []expression.Expression, []*valueInfo, bool, error) {
 	sc := d.sctx.GetSessionVars().StmtCtx
 	firstColumnChecker := &conditionChecker{
 		colUniqueID:   d.cols[0].UniqueID,
@@ -574,26 +602,46 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
 	dnfItems := expression.FlattenDNFConditions(condition)
 	newAccessItems := make([]expression.Expression, 0, len(dnfItems))
 	var totalRanges []*Range
+	columnValues := make([]*valueInfo, len(d.cols))
 	hasResidual := false
-	for _, item := range dnfItems {
+	for i, item := range dnfItems {
 		if sf, ok := item.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd {
 			cnfItems := expression.FlattenCNFConditions(sf)
 			var accesses, filters []expression.Expression
 			res, err := d.detachCNFCondAndBuildRangeForIndex(cnfItems, newTpSlice, true)
 			if err != nil {
-				return nil, nil, false, nil
+				return nil, nil, nil, false, nil
 			}
 			ranges := res.Ranges
 			accesses = res.AccessConds
 			filters = res.RemainedConds
 			if len(accesses) == 0 {
-				return FullRange(), nil, true, nil
+				return FullRange(), nil, nil, true, nil
 			}
 			if len(filters) > 0 {
 				hasResidual = true
 			}
 			totalRanges = append(totalRanges, ranges...)
 			newAccessItems = append(newAccessItems, expression.ComposeCNFCondition(d.sctx, accesses...))
+			if res.ColumnValues != nil {
+				if i == 0 {
+					columnValues = res.ColumnValues
+				} else {
+					// take the intersection of the two columnValues
+					for j, valInfo := range columnValues {
+						if valInfo == nil {
+							continue
+						}
+						sameVale, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j])
+						if err != nil {
+							return nil, nil, nil, false, errors.Trace(err)
+						}
+						if !sameVale {
+							columnValues[j] = nil
+						}
+					}
+				}
+			}
 		} else if firstColumnChecker.check(item) {
 			if firstColumnChecker.shouldReserve {
 				hasResidual = true
@@ -602,12 +650,24 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
 			points := rb.build(item)
 			ranges, err := points2Ranges(sc, points, newTpSlice[0])
 			if err != nil {
-				return nil, nil, false, errors.Trace(err)
+				return nil, nil, nil, false, errors.Trace(err)
 			}
 			totalRanges = append(totalRanges, ranges...)
 			newAccessItems = append(newAccessItems, item)
+			if i == 0 {
+				columnValues[0] = extractValueInfo(item)
+			} else if columnValues[0] != nil {
+				valInfo := extractValueInfo(item)
+				sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, columnValues[0], valInfo)
+				if err != nil {
+					return nil, nil, nil, false, errors.Trace(err)
+				}
+				if !sameValue {
+					columnValues[0] = nil
+				}
+			}
 		} else {
-			return FullRange(), nil, true, nil
+			return FullRange(), nil, nil, true, nil
 		}
 	}
 
@@ -617,10 +677,29 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
 	}
 	totalRanges, err := UnionRanges(sc, totalRanges, d.mergeConsecutive)
 	if err != nil {
-		return nil, nil, false, errors.Trace(err)
+		return nil, nil, nil, false, errors.Trace(err)
 	}
 
-	return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, hasResidual, nil
+	return totalRanges, []expression.Expression{expression.ComposeDNFCondition(d.sctx, newAccessItems...)}, columnValues, hasResidual, nil
+}
+
+// valueInfo is used for recording the constant column value in DetachCondAndBuildRangeForIndex.
+type valueInfo struct {
+	mutable bool         // If true, the constant column value depends on mutable constant.
+	value   *types.Datum // If not mutable, value is the constant column value. Otherwise value is nil.
+}
+
+func isSameValue(sc *stmtctx.StatementContext, lhs, rhs *valueInfo) (bool, error) {
+	// We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable. Maybe we can improve it later.
+	// TODO: is `lhs.value.Kind() != rhs.value.Kind()` necessary?
+	if lhs == nil || rhs == nil || lhs.mutable || rhs.mutable || lhs.value.Kind() != rhs.value.Kind() {
+		return false, nil
+	}
+	cmp, err := lhs.value.CompareDatum(sc, rhs.value)
+	if err != nil {
+		return false, err
+	}
+	return cmp == 0, nil
 }
 
 // DetachRangeResult wraps up results when detaching conditions and builing ranges.
@@ -631,8 +710,9 @@ type DetachRangeResult struct {
 	AccessConds []expression.Expression
 	// RemainedConds is the filter conditions which should be kept after access.
 	RemainedConds []expression.Expression
-	// EqualCols indicates whether the column is constant under the given conditions for all index columns.
-	EqualCols []bool
+	// ColumnValues records the constant column values for all index columns.
+	// For the ith column, if it is evaluated as constant, ColumnValues[i] is its value. Otherwise ColumnValues[i] is nil.
+	ColumnValues []*valueInfo
 	// EqCondCount is the number of equal conditions extracted.
 	EqCondCount int
 	// EqOrInCount is the number of equal/in conditions extracted.
@@ -671,12 +751,13 @@ func (d *rangeDetacher) detachCondAndBuildRangeForCols() (*DetachRangeResult, er
 	}
 	if len(d.allConds) == 1 {
 		if sf, ok := d.allConds[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicOr {
-			ranges, accesses, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice)
+			ranges, accesses, columnValues, hasResidual, err := d.detachDNFCondAndBuildRangeForIndex(sf, newTpSlice)
 			if err != nil {
 				return res, errors.Trace(err)
 			}
 			res.Ranges = ranges
 			res.AccessConds = accesses
+			res.ColumnValues = columnValues
 			res.IsDNFCond = true
 			// If this DNF have something cannot be to calculate range, then all this DNF should be pushed as filter condition.
 			if hasResidual {

From b2d975c71995d4962e2006b0dabbbbb73f017780 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Tue, 27 Jul 2021 10:44:23 +0800
Subject: [PATCH 09/21] fix ut & add comment

---
 planner/core/find_best_task.go                   |  6 ++----
 planner/core/logical_plans.go                    | 12 ++++++------
 planner/core/testdata/integration_suite_out.json |  4 ++--
 planner/util/path.go                             |  6 +++---
 util/ranger/detacher.go                          |  7 ++++++-
 5 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 34ce53d77a514..4d414153ba15a 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -500,8 +500,6 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 		}
 		return isMatchProp
 	}
-	// TODO: do we need to consider TiFlash here?
-	// TODO: check is it ok to cache the optimization?
 	all, _ := prop.AllSameOrder()
 	// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
 	// it needs not to keep order for index scan.
@@ -516,7 +514,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 	// select * from t where d = 1 and b = 2 order by c, b, a;
 	// ```
 	// In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a`
-	// matches the sort order. Hence, we use `path.ConstantCols` to deal with the above situations.
+	// matches the sort order. Hence, we use `path.ConstCols` to deal with the above situations.
 	if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
 		isMatchProp = true
 		i := 0
@@ -528,7 +526,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 					i++
 					break
 				}
-				if path.ConstantCols == nil || !path.ConstantCols[i] {
+				if path.ConstCols == nil || !path.ConstCols[i] {
 					break
 				}
 			}
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index abc6250f6fdbf..63f4d2f8c6002 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -675,10 +675,10 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
-		path.ConstantCols = make([]bool, len(path.IdxCols))
+		path.ConstCols = make([]bool, len(path.IdxCols))
 		if res.ColumnValues != nil {
-			for i := range path.ConstantCols {
-				path.ConstantCols[i] = res.ColumnValues[i] != nil
+			for i := range path.ConstCols {
+				path.ConstCols[i] = res.ColumnValues[i] != nil
 			}
 		}
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
@@ -860,10 +860,10 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 		path.EqCondCount = res.EqCondCount
 		path.EqOrInCondCount = res.EqOrInCount
 		path.IsDNFCond = res.IsDNFCond
-		path.ConstantCols = make([]bool, len(path.IdxCols))
+		path.ConstCols = make([]bool, len(path.IdxCols))
 		if res.ColumnValues != nil {
-			for i := range path.ConstantCols {
-				path.ConstantCols[i] = res.ColumnValues[i] != nil
+			for i := range path.ConstCols {
+				path.ConstCols[i] = res.ColumnValues[i] != nil
 			}
 		}
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 93025e19cf817..b93d2ef382647 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1660,14 +1660,14 @@
         "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c",
         "Plan": [
           "IndexReader 0.03 root  index:IndexRangeScan",
-          "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo",
+          "└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo"
         ]
       },
       {
         "SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
         "Plan": [
           "IndexReader 0.67 root  index:IndexRangeScan",
-          "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo",
+          "└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo"
         ]
       },
       {
diff --git a/planner/util/path.go b/planner/util/path.go
index 5d1af4a9f17a8..10e994e998a22 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -32,9 +32,9 @@ type AccessPath struct {
 	FullIdxColLens []int
 	IdxCols        []*expression.Column
 	IdxColLens     []int
-	// ConstantCols indicates whether the column is constant under the given conditions for all index columns.
-	ConstantCols []bool
-	Ranges       []*ranger.Range
+	// ConstCols indicates whether the column is constant under the given conditions for all index columns.
+	ConstCols []bool
+	Ranges    []*ranger.Range
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index 3c3258f4ef986..57d1791d7d5fd 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -557,6 +557,8 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 			accesses[i] = points2EqOrInCond(sctx, points[i], cols[i])
 			newConditions = append(newConditions, accesses[i])
 			if f, ok := accesses[i].(*expression.ScalarFunction); ok && f.FuncName.L == ast.EQ {
+				// Actually the constant column value may not be mutable. Here we assume it is mutable to keep it simple.
+				// Maybe we can improve it later.
 				columnValues[i] = &valueInfo{mutable: true}
 			}
 			sctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
@@ -690,7 +692,10 @@ type valueInfo struct {
 }
 
 func isSameValue(sc *stmtctx.StatementContext, lhs, rhs *valueInfo) (bool, error) {
-	// We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable. Maybe we can improve it later.
+	// We assume `lhs` and `rhs` are not the same when either `lhs` or `rhs` is mutable to keep it simple. If we consider
+	// mutable valueInfo, we need to set `sc.OptimDependOnMutableConst = true`, which makes the plan not able to be cached.
+	// On the other hand, the equal condition may not be used for optimization. Hence we simply regard mutable valueInfos different
+	// from others. Maybe we can improve it later.
 	// TODO: is `lhs.value.Kind() != rhs.value.Kind()` necessary?
 	if lhs == nil || rhs == nil || lhs.mutable || rhs.mutable || lhs.value.Kind() != rhs.value.Kind() {
 		return false, nil

From aadd749e0edde8833eae0800ea8345ba80826733 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Tue, 27 Jul 2021 20:52:06 +0800
Subject: [PATCH 10/21] minor fix

---
 planner/core/find_best_task.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 34e06e46093a1..60bd526b0c3c6 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -525,7 +525,7 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical
 					i++
 					break
 				}
-				if path.ConstCols == nil || !path.ConstCols[i] {
+				if path.ConstCols == nil || i >= len(path.ConstCols) || !path.ConstCols[i] {
 					break
 				}
 			}

From 3e73ab99ff8756052be4b953cfdbb39ab76e9c8b Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Wed, 28 Jul 2021 23:01:56 +0800
Subject: [PATCH 11/21] add heuristics in DataSource.DeriveStats

---
 planner/core/logical_plans.go | 77 +++++----------------------
 planner/core/stats.go         | 99 ++++++++++++++++++++++++++++-------
 planner/util/path.go          | 34 ++++++++++++
 util/ranger/detacher.go       |  6 +--
 4 files changed, 129 insertions(+), 87 deletions(-)

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 45703caa4a828..2d7ec8ffa4822 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -655,19 +655,19 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) {
 	return gathers
 }
 
-func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) (bool, error) {
+func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
 	path.CountAfterAccess = float64(ds.statisticTable.Count)
 	path.Ranges = ranger.FullNotNullRange()
 	path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index)
 	path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index)
 	if len(conds) == 0 {
-		return false, nil
+		return nil
 	}
 	sc := ds.ctx.GetSessionVars().StmtCtx
 	if len(path.IdxCols) != 0 {
 		res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.IdxCols, path.IdxColLens)
 		if err != nil {
-			return false, err
+			return err
 		}
 		path.Ranges = res.Ranges
 		path.AccessConds = res.AccessConds
@@ -683,7 +683,7 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 		}
 		path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
 		if err != nil {
-			return false, err
+			return err
 		}
 	} else {
 		path.TableFilters = conds
@@ -712,33 +712,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
 	if path.CountAfterAccess < ds.stats.RowCount && !isIm {
 		path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count))
 	}
-	// Check whether there's only point query.
-	noIntervalRanges := true
-	haveNullVal := false
-	for _, ran := range path.Ranges {
-		// Not point or the not full matched.
-		if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) {
-			noIntervalRanges = false
-			break
-		}
-		// Check whether there's null value.
-		for i := 0; i < len(path.Index.Columns); i++ {
-			if ran.HighVal[i].IsNull() {
-				haveNullVal = true
-				break
-			}
-		}
-		if haveNullVal {
-			break
-		}
-	}
-	return noIntervalRanges && !haveNullVal, nil
+	return nil
 }
 
 // deriveTablePathStats will fulfill the information that the AccessPath need.
-// And it will check whether the primary key is covered only by point query.
 // isIm indicates whether this function is called to generate the partial path for IndexMerge.
-func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) (bool, error) {
+func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) error {
 	if path.IsCommonHandlePath {
 		return ds.deriveCommonHandleTablePathStats(path, conds, isIm)
 	}
@@ -759,12 +738,12 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres
 	}
 	if pkCol == nil {
 		path.Ranges = ranger.FullIntRange(isUnsigned)
-		return false, nil
+		return nil
 	}
 
 	path.Ranges = ranger.FullIntRange(isUnsigned)
 	if len(conds) == 0 {
-		return false, nil
+		return nil
 	}
 	path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol)
 	// If there's no access cond, we try to find that whether there's expression containing correlated column that
@@ -800,11 +779,11 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres
 	}
 	if corColInAccessConds {
 		path.CountAfterAccess = 1
-		return true, nil
+		return nil
 	}
 	path.Ranges, err = ranger.BuildTableRange(path.AccessConds, sc, pkCol.RetType)
 	if err != nil {
-		return false, err
+		return err
 	}
 	path.CountAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.Ranges)
 	// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
@@ -812,15 +791,7 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres
 	if path.CountAfterAccess < ds.stats.RowCount && !isIm {
 		path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count))
 	}
-	// Check whether the primary key is covered by point query.
-	noIntervalRange := true
-	for _, ran := range path.Ranges {
-		if !ran.IsPoint(sc) {
-			noIntervalRange = false
-			break
-		}
-	}
-	return noIntervalRange, err
+	return err
 }
 
 func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Expression) error {
@@ -877,12 +848,9 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
 }
 
 // deriveIndexPathStats will fulfill the information that the AccessPath need.
-// And it will check whether this index is full matched by point query. We will use this check to
-// determine whether we remove other paths or not.
 // conds is the conditions used to generate the DetachRangeResult for path.
 // isIm indicates whether this function is called to generate the partial path for IndexMerge.
-func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) bool {
-	sc := ds.ctx.GetSessionVars().StmtCtx
+func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) {
 	if path.EqOrInCondCount == len(path.AccessConds) {
 		accesses, remained := path.SplitCorColAccessCondFromFilters(ds.ctx, path.EqOrInCondCount)
 		path.AccessConds = append(path.AccessConds, accesses...)
@@ -922,27 +890,6 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expres
 			path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.stats.RowCount)
 		}
 	}
-	// Check whether there's only point query.
-	noIntervalRanges := true
-	haveNullVal := false
-	for _, ran := range path.Ranges {
-		// Not point or the not full matched.
-		if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) {
-			noIntervalRanges = false
-			break
-		}
-		// Check whether there's null value.
-		for i := 0; i < len(path.Index.Columns); i++ {
-			if ran.HighVal[i].IsNull() {
-				haveNullVal = true
-				break
-			}
-		}
-		if haveNullVal {
-			break
-		}
-	}
-	return noIntervalRanges && !haveNullVal
 }
 
 func getPKIsHandleColFromSchema(cols []*model.ColumnInfo, schema *expression.Schema, pkIsHandle bool) *expression.Column {
diff --git a/planner/core/stats.go b/planner/core/stats.go
index 35fc31687e749..67e6bf8c4f773 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -15,6 +15,7 @@ package core
 
 import (
 	"context"
+	"golang.org/x/tools/container/intsets"
 	"math"
 	"sort"
 
@@ -280,30 +281,90 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 			return nil, err
 		}
 	}
+	// TODO: Can we move ds.deriveStatsByFilter after pruning by heuristics? In this way some computation can be avoided
+	// when ds.possibleAccessPaths are pruned.
 	ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths)
+	uniqueIdxsWithDoubleScan := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths))
+	singleScanIdxs := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths))
+	var selected, uniqueBest, refinedBest *util.AccessPath
 	for _, path := range ds.possibleAccessPaths {
 		if path.IsTablePath() {
-			noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds, false)
+			err := ds.deriveTablePathStats(path, ds.pushedDownConds, false)
 			if err != nil {
 				return nil, err
 			}
-			// If we have point or empty range, just remove other possible paths.
-			if noIntervalRanges || len(path.Ranges) == 0 {
-				ds.possibleAccessPaths[0] = path
-				ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
-				ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
-				break
-			}
-			continue
+		} else {
+			ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
 		}
-		noIntervalRanges := ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
-		// If we have empty range, or point range on unique index, just remove other possible paths.
-		if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 {
-			ds.possibleAccessPaths[0] = path
-			ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
-			ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
+		// TODO: Should we handle TiFlash case specially?
+		// Try some heuristic rules to select access path.
+		if len(path.Ranges) == 0 {
+			selected = path
 			break
 		}
+		// TODO: Can we record isSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
+		// as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path.
+		if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) {
+			if path.IsTablePath() || path.Index.Unique {
+				if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
+					selected = path
+					break
+				}
+				uniqueIdxsWithDoubleScan = append(uniqueIdxsWithDoubleScan, path)
+			}
+		} else if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
+			singleScanIdxs = append(singleScanIdxs, path)
+		}
+	}
+	if len(uniqueIdxsWithDoubleScan) > 0 {
+		// TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning.
+		uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan))
+		for _, uniqueIdx := range uniqueIdxsWithDoubleScan {
+			uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds))
+			// Find the unique index with the minimal number of ranges as `uniqueBest`.
+			if uniqueBest == nil || len(uniqueIdx.Ranges) < len(uniqueBest.Ranges) {
+				uniqueBest = uniqueIdx
+			}
+		}
+		// `uniqueBest` may not always be the best.
+		// ```
+		// create table t(a int, b int, c int, unique index idx_b(b), unique index idx_b_c(b, c));
+		// select b, c from t where b = 5 and c > 10;
+		// ```
+		// In the case, `uniqueBest` is `idx_b`. However, `idx_b_c` is better than `idx_b_c`.
+		// Hence, for each index in `singleScanIdxs`, we check whether it is better than some index in `uniqueIdxsWithDoubleScan`.
+		// If yes, the index is a refined one. We find the refined index with the minimal number of ranges as `refineBest`.
+		for _, singleScanIdx := range singleScanIdxs {
+			columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds)
+			for _, uniqueIdxColumnSet := range uniqueIdxColumnSets {
+				setsResult, comparable := compareColumnSet(columnSet, uniqueIdxColumnSet)
+				if comparable && setsResult == 1 {
+					if refinedBest == nil || len(singleScanIdx.Ranges) < len(refinedBest.Ranges) {
+						refinedBest = singleScanIdx
+					}
+				}
+			}
+		}
+		// `refineBest` may not always be better than `uniqueBest`.
+		// ```
+		// create table t(int a, int b, int c, int d, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d));
+		// select a, b, c from t where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);
+		// ```
+		// In the case, `refinedBest` is `idx_b_c_a_d` and `uniqueBest` is `a`. `idx_b_c_a_d` needs to access five points while `idx_a`
+		// only needs one point access and one table access.
+		// Hence we should compare `2 * len(uniqueBest.Ranges)` and `len(refinedBest.Ranges)` to select the better one.
+		if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) {
+			selected = refinedBest
+		} else {
+			selected = uniqueBest
+		}
+	}
+	// If some path matches a heuristic rule, just remove other possible paths
+	if selected != nil {
+		ds.possibleAccessPaths[0] = selected
+		ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
+		// TODO: Can we make a more carefull check on whether the optimization depends on mutable constants?
+		ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
 	}
 
 	// TODO: implement UnionScan + IndexMerge
@@ -513,7 +574,7 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us
 			} else {
 				path.IsIntHandlePath = true
 			}
-			noIntervalRanges, err := ds.deriveTablePathStats(path, conditions, true)
+			err := ds.deriveTablePathStats(path, conditions, true)
 			if err != nil {
 				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
 				continue
@@ -523,7 +584,7 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us
 				continue
 			}
 			// If we have point or empty range, just remove other possible paths.
-			if noIntervalRanges || len(path.Ranges) == 0 {
+			if len(path.Ranges) == 0 || path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) {
 				if len(results) == 0 {
 					results = append(results, path)
 				} else {
@@ -543,13 +604,13 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us
 				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
 				continue
 			}
-			noIntervalRanges := ds.deriveIndexPathStats(path, conditions, true)
+			ds.deriveIndexPathStats(path, conditions, true)
 			// If the path contains a full range, ignore it.
 			if ranger.HasFullRange(path.Ranges) {
 				continue
 			}
 			// If we have empty range, or point range on unique index, just remove other possible paths.
-			if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 {
+			if len(path.Ranges) == 0 || (path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) && path.Index.Unique) {
 				if len(results) == 0 {
 					results = append(results, path)
 				} else {
diff --git a/planner/util/path.go b/planner/util/path.go
index 10e994e998a22..665d71ccd4d23 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -19,6 +19,7 @@ import (
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/kv"
 	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/collate"
 	"github.com/pingcap/tidb/util/ranger"
@@ -138,3 +139,36 @@ func isColEqCorColOrConstant(ctx sessionctx.Context, filter expression.Expressio
 	}
 	return false
 }
+
+// OnlyPointRange checks whether each range is a point(no interval range exists).
+func (path *AccessPath) OnlyPointRange(sc *stmtctx.StatementContext) bool {
+	noIntervalRange := true
+	if path.IsIntHandlePath {
+		for _, ran := range path.Ranges {
+			if !ran.IsPoint(sc) {
+				noIntervalRange = false
+				break
+			}
+		}
+		return noIntervalRange
+	}
+	haveNullVal := false
+	for _, ran := range path.Ranges {
+		// Not point or the not full matched.
+		if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) {
+			noIntervalRange = false
+			break
+		}
+		// Check whether there's null value.
+		for i := 0; i < len(path.Index.Columns); i++ {
+			if ran.HighVal[i].IsNull() {
+				haveNullVal = true
+				break
+			}
+		}
+		if haveNullVal {
+			break
+		}
+	}
+	return noIntervalRange && !haveNullVal
+}
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index 57d1791d7d5fd..33df3016e5aeb 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -242,7 +242,7 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
 
 func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo {
 	if lhs == nil {
-		lhs = make([]*valueInfo, numCols)
+		return rhs
 	}
 	if rhs != nil {
 		for i, valInfo := range lhs {
@@ -634,11 +634,11 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression
 						if valInfo == nil {
 							continue
 						}
-						sameVale, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j])
+						sameValue, err := isSameValue(d.sctx.GetSessionVars().StmtCtx, valInfo, res.ColumnValues[j])
 						if err != nil {
 							return nil, nil, nil, false, errors.Trace(err)
 						}
-						if !sameVale {
+						if !sameValue {
 							columnValues[j] = nil
 						}
 					}

From ff458ef67d5b6afdf9b7c5b362d5f7dc61a7832f Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 29 Jul 2021 13:07:40 +0800
Subject: [PATCH 12/21] append warning about heuristic index selection

---
 planner/core/stats.go   | 18 +++++++++++++++++-
 util/ranger/detacher.go |  9 ++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/planner/core/stats.go b/planner/core/stats.go
index 67e6bf8c4f773..0c2f4dba17c3b 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -363,8 +363,24 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 	if selected != nil {
 		ds.possibleAccessPaths[0] = selected
 		ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
-		// TODO: Can we make a more carefull check on whether the optimization depends on mutable constants?
+		// TODO: Can we make a more careful check on whether the optimization depends on mutable constants?
 		ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
+		if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt {
+			var tableName, pathName string
+			if ds.TableAsName.O == "" {
+				tableName = ds.tableInfo.Name.O
+			} else {
+				tableName = ds.TableAsName.O
+			}
+			if !selected.IsTablePath() {
+				pathName = "primary key of " + tableName
+			} else {
+				pathName = "index " + selected.Index.Name.O + " of " + tableName
+			}
+			// TODO: Do we need to specify which heuristic rule `selected` matches? It is kind of hard to briefly describe the
+			// three heuristic rules. Besides, we can distinguish the three rules by checking EXPLAIN result.
+			ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected by heuristics"))
+		}
 	}
 
 	// TODO: implement UnionScan + IndexMerge
diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
index 33df3016e5aeb..e3566a8119afa 100644
--- a/util/ranger/detacher.go
+++ b/util/ranger/detacher.go
@@ -207,7 +207,7 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
 			return &DetachRangeResult{}, -1, nil, nil
 		}
 		// take the union of the two columnValues
-		columnValues = unionColumnValues(columnValues, res.ColumnValues, len(cols))
+		columnValues = unionColumnValues(columnValues, res.ColumnValues)
 		if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 {
 			continue
 		}
@@ -240,12 +240,15 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
 	return r, offset, columnValues, nil
 }
 
-func unionColumnValues(lhs, rhs []*valueInfo, numCols int) []*valueInfo {
+func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo {
 	if lhs == nil {
 		return rhs
 	}
 	if rhs != nil {
 		for i, valInfo := range lhs {
+			if i >= len(rhs) {
+				break
+			}
 			if valInfo == nil && rhs[i] != nil {
 				lhs[i] = rhs[i]
 			}
@@ -312,7 +315,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
 		if err != nil {
 			return nil, err
 		}
-		res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues, len(d.cols))
+		res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues)
 		if pointRes != nil {
 			if len(pointRes.Ranges) == 0 {
 				return &DetachRangeResult{}, nil

From c7ab8771ed4e777c49387f5bd7e2024e2c8a31f8 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 29 Jul 2021 16:24:58 +0800
Subject: [PATCH 13/21] add test for heuristics

---
 planner/core/integration_test.go              | 29 ++++++++++++-
 planner/core/stats.go                         |  8 ++--
 .../core/testdata/integration_suite_in.json   |  9 ++++
 .../core/testdata/integration_suite_out.json  | 43 +++++++++++++++++++
 4 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index 95b8fd391f387..39f2be2e4ae3c 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -4016,13 +4016,13 @@ func (s *testIntegrationSerialSuite) TestCTESelfJoin(c *C) {
 		with inv as
 		(select t1a , t3a, sum(t2c)
 			from t1, t2, t3
-			where t2a = t1a  
+			where t2a = t1a
 				and t2b = t3b
 				and t3c = 1998
 			group by t1a, t3a)
 		select inv1.t1a, inv2.t3a
 		from inv inv1, inv inv2
-		where inv1.t1a = inv2.t1a  
+		where inv1.t1a = inv2.t1a
 			and inv1.t3a = 4
 			and inv2.t3a = 4+1`)
 }
@@ -4050,3 +4050,28 @@ func (s *testIntegrationSuite) TestIssue26559(c *C) {
 	tk.MustExec("insert into t values('2020-07-29 09:07:01', '2020-07-27 16:57:36');")
 	tk.MustQuery("select greatest(a, b) from t union select null;").Sort().Check(testkit.Rows("2020-07-29 09:07:01", "<nil>"))
 }
+
+func (s *testIntegrationSuite) TestHeuristicIndexSelection(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t1, t2")
+	tk.MustExec("create table t1(a int, b int, c int, d int, e int, f int, g int, primary key (a), unique key c_d_e (c, d, e), unique key f (f), unique key f_g (f, g), key g (g))")
+	tk.MustExec("create table t2(a int, b int, c int, d int, unique index idx_a (a), unique index idx_b_c (b, c), unique index idx_b_c_a_d (b, c, a, d))")
+
+	var input []string
+	var output []struct {
+		SQL      string
+		Plan     []string
+		Warnings []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows())
+			output[i].Warnings = s.testData.ConvertRowsToStrings(tk.MustQuery("show warnings").Rows())
+		})
+		tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...))
+		tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...))
+	}
+}
diff --git a/planner/core/stats.go b/planner/core/stats.go
index 0c2f4dba17c3b..c8ab4ba873271 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -316,7 +316,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 			singleScanIdxs = append(singleScanIdxs, path)
 		}
 	}
-	if len(uniqueIdxsWithDoubleScan) > 0 {
+	if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 {
 		// TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning.
 		uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan))
 		for _, uniqueIdx := range uniqueIdxsWithDoubleScan {
@@ -347,12 +347,12 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 		}
 		// `refineBest` may not always be better than `uniqueBest`.
 		// ```
-		// create table t(int a, int b, int c, int d, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d));
+		// create table t(a int, b int, c int, d int, unique index idx_a(a), unique index idx_b_c(b, c), unique index idx_b_c_a_d(b, c, a, d));
 		// select a, b, c from t where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);
 		// ```
 		// In the case, `refinedBest` is `idx_b_c_a_d` and `uniqueBest` is `a`. `idx_b_c_a_d` needs to access five points while `idx_a`
 		// only needs one point access and one table access.
-		// Hence we should compare `2 * len(uniqueBest.Ranges)` and `len(refinedBest.Ranges)` to select the better one.
+		// Hence we should compare `len(refinedBest.Ranges)` and `2*len(uniqueBest.Ranges)` to select the better one.
 		if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) {
 			selected = refinedBest
 		} else {
@@ -372,7 +372,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 			} else {
 				tableName = ds.TableAsName.O
 			}
-			if !selected.IsTablePath() {
+			if selected.IsTablePath() {
 				pathName = "primary key of " + tableName
 			} else {
 				pathName = "index " + selected.Index.Name.O + " of " + tableName
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index e7e9bb12e6001..901d509459403 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -320,5 +320,14 @@
       "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
       "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d"
     ]
+  },
+  {
+    "name": "TestHeuristicIndexSelection",
+    "cases": [
+      "select f, g from t1 where f = 2 and g in (3, 4, 5)",
+      "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)",
+      "select f, g from t1 where f = 2 and g > 3",
+      "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);"
+    ]
   }
 ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index b93d2ef382647..beeedddb0ef36 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1679,5 +1679,48 @@
         ]
       }
     ]
+  },
+  {
+    "Name": "TestHeuristicIndexSelection",
+    "Cases": [
+      {
+        "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)",
+        "Plan": [
+          "Batch_Point_Get_5 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false"
+        ],
+        "Warnings": [
+          "Note 1105 index f_g of t1 is selected by heuristics"
+        ]
+      },
+      {
+        "SQL": "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)",
+        "Plan": [
+          "Batch_Point_Get_5 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false"
+        ],
+        "Warnings": [
+          "Note 1105 index c_d_e of t1 is selected by heuristics"
+        ]
+      },
+      {
+        "SQL": "select f, g from t1 where f = 2 and g > 3",
+        "Plan": [
+          "IndexReader_6 33.33 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo"
+        ],
+        "Warnings": [
+          "Note 1105 index f_g of t1 is selected by heuristics"
+        ]
+      },
+      {
+        "SQL": "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)",
+        "Plan": [
+          "Selection_6 0.01 root  eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)",
+          "└─Point_Get_5 1.00 root table:t2, index:idx_a(a) "
+        ],
+        "Warnings": [
+          "Note 1105 index idx_a of t2 is selected by heuristics"
+        ]
+      }
+    ]
   }
 ]

From 4f8465128422317f56d954440832b63f3b3853b2 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 29 Jul 2021 17:29:59 +0800
Subject: [PATCH 14/21] add test

---
 planner/core/stats.go                            |  9 ++++++++-
 planner/core/testdata/integration_suite_out.json | 12 ++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/planner/core/stats.go b/planner/core/stats.go
index c8ab4ba873271..a4e19996834a6 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -306,7 +306,14 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 		// as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path.
 		if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) {
 			if path.IsTablePath() || path.Index.Unique {
-				if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
+				var singleScan bool
+				if path.IsTablePath() {
+					singleScan = true
+				} else {
+					singleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
+				}
+				if singleScan {
+					// TODO: What if multiple paths satisfy all conditions?
 					selected = path
 					break
 				}
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index beeedddb0ef36..b6ce5c8d28671 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1686,7 +1686,7 @@
       {
         "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)",
         "Plan": [
-          "Batch_Point_Get_5 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false"
+          "Batch_Point_Get 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false"
         ],
         "Warnings": [
           "Note 1105 index f_g of t1 is selected by heuristics"
@@ -1695,7 +1695,7 @@
       {
         "SQL": "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)",
         "Plan": [
-          "Batch_Point_Get_5 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false"
+          "Batch_Point_Get 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false"
         ],
         "Warnings": [
           "Note 1105 index c_d_e of t1 is selected by heuristics"
@@ -1704,8 +1704,8 @@
       {
         "SQL": "select f, g from t1 where f = 2 and g > 3",
         "Plan": [
-          "IndexReader_6 33.33 root  index:IndexRangeScan_5",
-          "└─IndexRangeScan_5 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo"
+          "IndexReader 33.33 root  index:IndexRangeScan",
+          "└─IndexRangeScan 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo"
         ],
         "Warnings": [
           "Note 1105 index f_g of t1 is selected by heuristics"
@@ -1714,8 +1714,8 @@
       {
         "SQL": "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)",
         "Plan": [
-          "Selection_6 0.01 root  eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)",
-          "└─Point_Get_5 1.00 root table:t2, index:idx_a(a) "
+          "Selection 0.01 root  eq(test.t2.b, 2), in(test.t2.c, 1, 2, 3, 4, 5)",
+          "└─Point_Get 1.00 root table:t2, index:idx_a(a) "
         ],
         "Warnings": [
           "Note 1105 index idx_a of t2 is selected by heuristics"

From ded7ec893ebe80ea50c04fd28ffbef72845c3560 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Thu, 29 Jul 2021 17:46:23 +0800
Subject: [PATCH 15/21] fmt

---
 planner/core/stats.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/core/stats.go b/planner/core/stats.go
index a4e19996834a6..4e5cee99236e2 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -15,7 +15,6 @@ package core
 
 import (
 	"context"
-	"golang.org/x/tools/container/intsets"
 	"math"
 	"sort"
 
@@ -31,6 +30,7 @@ import (
 	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/ranger"
 	"go.uber.org/zap"
+	"golang.org/x/tools/container/intsets"
 )
 
 func (p *basePhysicalPlan) StatsCount() float64 {

From a2900fc13b403ae5c5d6c159c54290f2d799a4db Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Mon, 2 Aug 2021 14:17:18 +0800
Subject: [PATCH 16/21] add orderByPKLimitN

---
 planner/core/logical_plan_builder.go | 18 ++++++++++++++++++
 planner/core/logical_plans.go        |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 838a97bf0359d..fecb7cf404a20 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -3407,6 +3407,15 @@ func (b *PlanBuilder) TableHints() *tableHintInfo {
 	return &(b.tableHintInfo[len(b.tableHintInfo)-1])
 }
 
+func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.SelectStmt) {
+	ds, ok := p.(*DataSource)
+	if !ok || sel.OrderBy == nil || sel.Limit == nil {
+		return
+	}
+
+	ds.orderByPKLimitN = true
+}
+
 func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p LogicalPlan, err error) {
 	b.pushSelectOffset(sel.QueryBlockOffset)
 	b.pushTableHints(sel.TableHints, sel.QueryBlockOffset)
@@ -3466,6 +3475,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	if err != nil {
 		return nil, err
 	}
+	// For filling DataSource.orderByPKLimitN
+	ds, isDataSource := p.(*DataSource)
 
 	originalFields := sel.Fields.Fields
 	sel.Fields.Fields, err = b.unfoldWildStar(p, sel.Fields.Fields)
@@ -3650,6 +3661,13 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		if err != nil {
 			return nil, err
 		}
+		if logicalSort, isSort := p.(*LogicalSort); isSort && sel.Limit != nil && isDataSource {
+			if col, isCol := logicalSort.ByItems[0].Expr.(*expression.Column); isCol && ds.handleCols.NumCols() == 1 {
+				if ds.handleCols.GetCol(0).Equal(nil, col) && sel.Limit != nil {
+					ds.orderByPKLimitN = true
+				}
+			}
+		}
 	}
 
 	if sel.Limit != nil {
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 7c2a7b8b2ad8f..4e60bf82a947d 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -523,6 +523,8 @@ type DataSource struct {
 	// 1. use `inside insert`, `update`, `delete` or `select for update` statement
 	// 2. isolation level is RC
 	isForUpdateRead bool
+	// orderByPKLimitN is true iff there exists `order by pk limit n` pattern.
+	orderByPKLimitN bool
 }
 
 // ExtractCorrelatedCols implements LogicalPlan interface.

From ab63beb620d270d55b9cabe6565866c6b324a437 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Mon, 2 Aug 2021 16:52:45 +0800
Subject: [PATCH 17/21] resolve some TODOs

---
 planner/core/exhaust_physical_plans.go        |  2 +-
 planner/core/find_best_task.go                | 32 ++++++------
 planner/core/stats.go                         | 50 +++++++++++--------
 .../core/testdata/integration_suite_in.json   |  1 +
 .../core/testdata/integration_suite_out.json  | 17 +++++--
 planner/util/path.go                          |  2 +
 6 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go
index 6eaa43d5a192b..dbdb5194e4e36 100644
--- a/planner/core/exhaust_physical_plans.go
+++ b/planner/core/exhaust_physical_plans.go
@@ -1045,7 +1045,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
 		Columns:        ds.TblCols,
 		ColumnNames:    ds.names,
 	}
-	if !ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table) {
+	if !path.IsSingleScan {
 		// On this way, it's double read case.
 		ts := PhysicalTableScan{
 			Columns:         ds.Columns,
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
index 0d14a0b9c83be..d96807c86edc0 100644
--- a/planner/core/find_best_task.go
+++ b/planner/core/find_best_task.go
@@ -417,7 +417,6 @@ type candidatePath struct {
 	path               *util.AccessPath
 	accessCondsColSet  *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
 	indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
-	isSingleScan       bool
 	isMatchProp        bool
 }
 
@@ -452,8 +451,8 @@ func compareBool(l, r bool) int {
 }
 
 func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
-	result := compareBool(lhs.isSingleScan, rhs.isSingleScan)
-	if result == 0 && !lhs.isSingleScan {
+	result := compareBool(lhs.path.IsSingleScan, rhs.path.IsSingleScan)
+	if result == 0 && !lhs.path.IsSingleScan {
 		// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
 		// to compare how many table rows will be accessed.
 		return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
@@ -542,16 +541,14 @@ func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.Ph
 	candidate := &candidatePath{path: path}
 	candidate.isMatchProp = ds.isMatchProp(path, prop)
 	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
-	candidate.isSingleScan = true
 	return candidate
 }
 
-func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath {
+func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
 	candidate := &candidatePath{path: path}
 	candidate.isMatchProp = ds.isMatchProp(path, prop)
 	candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
 	candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
-	candidate.isSingleScan = isSingleScan
 	return candidate
 }
 
@@ -594,14 +591,13 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida
 				continue
 			}
 		} else {
-			coveredByIdx := ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
-			if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || coveredByIdx {
+			if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || path.IsSingleScan {
 				// We will use index to generate physical plan if any of the following conditions is satisfied:
 				// 1. This path's access cond is not nil.
 				// 2. We have a non-empty prop to match.
 				// 3. This index is forced to choose.
 				// 4. The needed columns are all covered by index columns(and handleCol).
-				currentCandidate = ds.getIndexCandidate(path, prop, coveredByIdx)
+				currentCandidate = ds.getIndexCandidate(path, prop)
 			} else {
 				continue
 			}
@@ -1097,7 +1093,7 @@ func (ts *PhysicalTableScan) appendExtraHandleCol(ds *DataSource) (*expression.C
 
 // convertToIndexScan converts the DataSource to index scan with idx.
 func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candidate *candidatePath) (task task, err error) {
-	if !candidate.isSingleScan {
+	if !candidate.path.IsSingleScan {
 		// If it's parent requires single read task, return max cost.
 		if prop.TaskTp == property.CopSingleReadTaskType {
 			return invalidTask, nil
@@ -1110,7 +1106,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid
 		return invalidTask, nil
 	}
 	path := candidate.path
-	is, cost, _ := ds.getOriginalPhysicalIndexScan(prop, path, candidate.isMatchProp, candidate.isSingleScan)
+	is, cost, _ := ds.getOriginalPhysicalIndexScan(prop, path, candidate.isMatchProp, candidate.path.IsSingleScan)
 	cop := &copTask{
 		indexPlan:   is,
 		tblColHists: ds.TblColHists,
@@ -1122,7 +1118,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid
 		Columns:        ds.TblCols,
 		ColumnNames:    ds.names,
 	}
-	if !candidate.isSingleScan {
+	if !candidate.path.IsSingleScan {
 		// On this way, it's double read case.
 		ts := PhysicalTableScan{
 			Columns:         ds.Columns,
@@ -1688,8 +1684,8 @@ func (ds *DataSource) convertToPointGet(prop *property.PhysicalProperty, candida
 	if !prop.IsEmpty() && !candidate.isMatchProp {
 		return invalidTask
 	}
-	if prop.TaskTp == property.CopDoubleReadTaskType && candidate.isSingleScan ||
-		prop.TaskTp == property.CopSingleReadTaskType && !candidate.isSingleScan {
+	if prop.TaskTp == property.CopDoubleReadTaskType && candidate.path.IsSingleScan ||
+		prop.TaskTp == property.CopSingleReadTaskType && !candidate.path.IsSingleScan {
 		return invalidTask
 	}
 
@@ -1745,7 +1741,7 @@ func (ds *DataSource) convertToPointGet(prop *property.PhysicalProperty, candida
 		pointGetPlan.IdxColLens = candidate.path.IdxColLens
 		pointGetPlan.IndexValues = candidate.path.Ranges[0].LowVal
 		pointGetPlan.PartitionInfo = partitionInfo
-		if candidate.isSingleScan {
+		if candidate.path.IsSingleScan {
 			cost = pointGetPlan.GetCost(candidate.path.IdxCols)
 		} else {
 			cost = pointGetPlan.GetCost(ds.TblCols)
@@ -1771,8 +1767,8 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, ca
 	if !prop.IsEmpty() && !candidate.isMatchProp {
 		return invalidTask
 	}
-	if prop.TaskTp == property.CopDoubleReadTaskType && candidate.isSingleScan ||
-		prop.TaskTp == property.CopSingleReadTaskType && !candidate.isSingleScan {
+	if prop.TaskTp == property.CopDoubleReadTaskType && candidate.path.IsSingleScan ||
+		prop.TaskTp == property.CopSingleReadTaskType && !candidate.path.IsSingleScan {
 		return invalidTask
 	}
 
@@ -1819,7 +1815,7 @@ func (ds *DataSource) convertToBatchPointGet(prop *property.PhysicalProperty, ca
 			batchPointGetPlan.KeepOrder = true
 			batchPointGetPlan.Desc = prop.SortItems[0].Desc
 		}
-		if candidate.isSingleScan {
+		if candidate.path.IsSingleScan {
 			cost = batchPointGetPlan.GetCost(candidate.path.IdxCols)
 		} else {
 			cost = batchPointGetPlan.GetCost(ds.TblCols)
diff --git a/planner/core/stats.go b/planner/core/stats.go
index 4e5cee99236e2..2e98e3d664c64 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -15,8 +15,10 @@ package core
 
 import (
 	"context"
+	"fmt"
 	"math"
 	"sort"
+	"strings"
 
 	"github.com/pingcap/errors"
 	"github.com/pingcap/parser/ast"
@@ -286,45 +288,39 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 	ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths)
 	uniqueIdxsWithDoubleScan := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths))
 	singleScanIdxs := make([]*util.AccessPath, 0, len(ds.possibleAccessPaths))
-	var selected, uniqueBest, refinedBest *util.AccessPath
+	var (
+		selected, uniqueBest, refinedBest *util.AccessPath
+		isRefinedPath                     bool
+	)
 	for _, path := range ds.possibleAccessPaths {
 		if path.IsTablePath() {
 			err := ds.deriveTablePathStats(path, ds.pushedDownConds, false)
 			if err != nil {
 				return nil, err
 			}
+			path.IsSingleScan = true
 		} else {
 			ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
+			path.IsSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
 		}
-		// TODO: Should we handle TiFlash case specially?
 		// Try some heuristic rules to select access path.
 		if len(path.Ranges) == 0 {
 			selected = path
 			break
 		}
-		// TODO: Can we record isSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
-		// as a field of AccessPath? In this way ds.isCoveringIndex only needs to be called once for each path.
 		if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) {
 			if path.IsTablePath() || path.Index.Unique {
-				var singleScan bool
-				if path.IsTablePath() {
-					singleScan = true
-				} else {
-					singleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
-				}
-				if singleScan {
-					// TODO: What if multiple paths satisfy all conditions?
+				if path.IsSingleScan {
 					selected = path
 					break
 				}
 				uniqueIdxsWithDoubleScan = append(uniqueIdxsWithDoubleScan, path)
 			}
-		} else if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
+		} else if path.IsSingleScan {
 			singleScanIdxs = append(singleScanIdxs, path)
 		}
 	}
 	if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 {
-		// TODO: Move accessCondsColSet from candidatePath to AccessPath so that we can use it both here and skyline pruning.
 		uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan))
 		for _, uniqueIdx := range uniqueIdxsWithDoubleScan {
 			uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds))
@@ -362,6 +358,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 		// Hence we should compare `len(refinedBest.Ranges)` and `2*len(uniqueBest.Ranges)` to select the better one.
 		if refinedBest != nil && (uniqueBest == nil || len(refinedBest.Ranges) < 2*len(uniqueBest.Ranges)) {
 			selected = refinedBest
+			isRefinedPath = true
 		} else {
 			selected = uniqueBest
 		}
@@ -373,20 +370,33 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 		// TODO: Can we make a more careful check on whether the optimization depends on mutable constants?
 		ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
 		if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt {
-			var tableName, pathName string
+			var tableName string
 			if ds.TableAsName.O == "" {
 				tableName = ds.tableInfo.Name.O
 			} else {
 				tableName = ds.TableAsName.O
 			}
 			if selected.IsTablePath() {
-				pathName = "primary key of " + tableName
+				// TODO: primary key / handle / real name?
+				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(fmt.Sprintf("handle of %s is selected since the path only has point ranges", tableName)))
 			} else {
-				pathName = "index " + selected.Index.Name.O + " of " + tableName
+				var sb strings.Builder
+				if selected.Index.Unique {
+					sb.WriteString("unique ")
+				}
+				sb.WriteString(fmt.Sprintf("index %s of %s is selected since the path", selected.Index.Name.O, tableName))
+				if isRefinedPath {
+					sb.WriteString(" only fetches limited number of rows")
+				} else {
+					sb.WriteString(" only has point ranges")
+				}
+				if selected.IsSingleScan {
+					sb.WriteString(" with single scan")
+				} else {
+					sb.WriteString(" with double scan")
+				}
+				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String()))
 			}
-			// TODO: Do we need to specify which heuristic rule `selected` matches? It is kind of hard to briefly describe the
-			// three heuristic rules. Besides, we can distinguish the three rules by checking EXPLAIN result.
-			ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected by heuristics"))
 		}
 	}
 
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index 901d509459403..2b661b674c535 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -324,6 +324,7 @@
   {
     "name": "TestHeuristicIndexSelection",
     "cases": [
+      "select * from t1 where a = 3 or a = 5",
       "select f, g from t1 where f = 2 and g in (3, 4, 5)",
       "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)",
       "select f, g from t1 where f = 2 and g > 3",
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index b6ce5c8d28671..4a1b76289375e 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1683,13 +1683,22 @@
   {
     "Name": "TestHeuristicIndexSelection",
     "Cases": [
+      {
+        "SQL": "select * from t1 where a = 3 or a = 5",
+        "Plan": [
+          "Batch_Point_Get 2.00 root table:t1 handle:[3 5], keep order:false, desc:false"
+        ],
+        "Warnings": [
+          "Note 1105 handle of t1 is selected since the path only has point ranges"
+        ]
+      },
       {
         "SQL": "select f, g from t1 where f = 2 and g in (3, 4, 5)",
         "Plan": [
           "Batch_Point_Get 3.00 root table:t1, index:f_g(f, g) keep order:false, desc:false"
         ],
         "Warnings": [
-          "Note 1105 index f_g of t1 is selected by heuristics"
+          "Note 1105 unique index f_g of t1 is selected since the path only has point ranges with single scan"
         ]
       },
       {
@@ -1698,7 +1707,7 @@
           "Batch_Point_Get 4.00 root table:t1, index:c_d_e(c, d, e) keep order:false, desc:false"
         ],
         "Warnings": [
-          "Note 1105 index c_d_e of t1 is selected by heuristics"
+          "Note 1105 unique index c_d_e of t1 is selected since the path only has point ranges with double scan"
         ]
       },
       {
@@ -1708,7 +1717,7 @@
           "└─IndexRangeScan 33.33 cop[tikv] table:t1, index:f_g(f, g) range:(2 3,2 +inf], keep order:false, stats:pseudo"
         ],
         "Warnings": [
-          "Note 1105 index f_g of t1 is selected by heuristics"
+          "Note 1105 unique index f_g of t1 is selected since the path only fetches limited number of rows with single scan"
         ]
       },
       {
@@ -1718,7 +1727,7 @@
           "└─Point_Get 1.00 root table:t2, index:idx_a(a) "
         ],
         "Warnings": [
-          "Note 1105 index idx_a of t2 is selected by heuristics"
+          "Note 1105 unique index idx_a of t2 is selected since the path only has point ranges with double scan"
         ]
       }
     ]
diff --git a/planner/util/path.go b/planner/util/path.go
index 665d71ccd4d23..5a29d007c29db 100644
--- a/planner/util/path.go
+++ b/planner/util/path.go
@@ -62,6 +62,8 @@ type AccessPath struct {
 	IsCommonHandlePath bool
 	// Forced means this path is generated by `use/force index()`.
 	Forced bool
+	// IsSingleScan indicates whether the path is a single index/table scan or table access after index scan.
+	IsSingleScan bool
 }
 
 // IsTablePath returns true if it's IntHandlePath or CommonHandlePath.

From 11f3080d122ff4ffccb70aa4ed2073380df1a3cf Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Mon, 2 Aug 2021 19:39:20 +0800
Subject: [PATCH 18/21] upd

---
 planner/core/logical_plan_builder.go | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index fecb7cf404a20..c93c7d5ae0206 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -3416,6 +3416,27 @@ func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.Se
 	ds.orderByPKLimitN = true
 }
 
+func checkOrderByPK(ds *DataSource, byItems []*util.ByItems) bool {
+	if ds.tableInfo.PKIsHandle && len(byItems) == 1 {
+		if col, isCol := byItems[0].Expr.(*expression.Column); isCol && col.Equal(nil, ds.getPKIsHandleCol()) {
+			return true
+		}
+		return false
+	}
+	if ds.tableInfo.IsCommonHandle && len(byItems) == len(ds.commonHandleCols) {
+		orderByPK := true
+		for i, byItem := range byItems {
+			if col, isCol := byItem.Expr.(*expression.Column); !isCol || ds.commonHandleLens[i] != types.UnspecifiedLength ||
+				!col.Equal(nil, ds.commonHandleCols[i]) || (i > 0 && byItem.Desc != byItems[i-1].Desc) {
+				orderByPK = false
+				break
+			}
+		}
+		return orderByPK
+	}
+	return false
+}
+
 func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p LogicalPlan, err error) {
 	b.pushSelectOffset(sel.QueryBlockOffset)
 	b.pushTableHints(sel.TableHints, sel.QueryBlockOffset)
@@ -3662,11 +3683,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 			return nil, err
 		}
 		if logicalSort, isSort := p.(*LogicalSort); isSort && sel.Limit != nil && isDataSource {
-			if col, isCol := logicalSort.ByItems[0].Expr.(*expression.Column); isCol && ds.handleCols.NumCols() == 1 {
-				if ds.handleCols.GetCol(0).Equal(nil, col) && sel.Limit != nil {
-					ds.orderByPKLimitN = true
-				}
-			}
+			ds.orderByPKLimitN = checkOrderByPK(ds, logicalSort.ByItems)
 		}
 	}
 

From 97c34172d4fe374992b5b49bcaafff758e4ab1fc Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Tue, 3 Aug 2021 17:45:22 +0800
Subject: [PATCH 19/21] upd & add testcases

---
 executor/set_test.go                          | 10 ++++++
 planner/core/integration_test.go              | 24 ++++++++++++++
 planner/core/stats.go                         | 32 ++++++++++++++++++-
 .../core/testdata/integration_suite_in.json   |  9 +++++-
 .../core/testdata/integration_suite_out.json  | 30 +++++++++++++++++
 sessionctx/variable/session.go                |  4 +++
 sessionctx/variable/sysvar.go                 |  4 +++
 sessionctx/variable/tidb_vars.go              |  4 +++
 8 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/executor/set_test.go b/executor/set_test.go
index 8980efe58cad5..9e7d7a0dc3a9e 100644
--- a/executor/set_test.go
+++ b/executor/set_test.go
@@ -539,6 +539,16 @@ func (s *testSerialSuite1) TestSetVar(c *C) {
 	tk.MustExec(`set tidb_opt_limit_push_down_threshold = 20`)
 	tk.MustQuery(`select @@global.tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("100"))
 	tk.MustQuery(`select @@tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("20"))
+
+	tk.MustQuery("select @@tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0"))
+	tk.MustExec("set global tidb_enable_maybe_good_heuristics = 1")
+	tk.MustQuery("select @@global.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("1"))
+	tk.MustExec("set global tidb_enable_maybe_good_heuristics = 0")
+	tk.MustQuery("select @@global.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0"))
+	tk.MustExec("set session tidb_enable_maybe_good_heuristics = 1")
+	tk.MustQuery("select @@session.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("1"))
+	tk.MustExec("set session tidb_enable_maybe_good_heuristics = 0")
+	tk.MustQuery("select @@session.tidb_enable_maybe_good_heuristics").Check(testkit.Rows("0"))
 }
 
 func (s *testSuite5) TestTruncateIncorrectIntSessionVar(c *C) {
diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index 94e0e5096c965..ae2e5edfda4d2 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -4164,3 +4164,27 @@ func (s *testIntegrationSuite) TestHeuristicIndexSelection(c *C) {
 		tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...))
 	}
 }
+
+func (s *testIntegrationSuite) TestMaybeGoodHeuristics(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(a int primary key, b int, c int, index idx_b(b))")
+
+	var input []string
+	var output []struct {
+		SQL      string
+		Plan     []string
+		Warnings []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows())
+			output[i].Warnings = s.testData.ConvertRowsToStrings(tk.MustQuery("show warnings").Rows())
+		})
+		tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...))
+		tk.MustQuery("show warnings").Check(testkit.Rows(output[i].Warnings...))
+	}
+}
diff --git a/planner/core/stats.go b/planner/core/stats.go
index 2e98e3d664c64..0c31d1902cfa9 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -308,7 +308,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 			selected = path
 			break
 		}
-		if path.OnlyPointRange(ds.SCtx().GetSessionVars().StmtCtx) {
+		if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) {
 			if path.IsTablePath() || path.Index.Unique {
 				if path.IsSingleScan {
 					selected = path
@@ -398,6 +398,36 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String()))
 			}
 		}
+	} else if ds.ctx.GetSessionVars().EnableMaybeGoodHeuristics && ds.orderByPKLimitN {
+		// maybe-good heuristics
+		// For query like `where index_col = ... order by pk limit n`, if the count of `index_col = ...` is small enough, we prefer the index.
+		for _, path := range ds.possibleAccessPaths {
+			// TODO: add a variable instead of using 100
+			if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) && path.CountAfterAccess < 100 {
+				selected = path
+				break
+			}
+		}
+		if selected != nil {
+			ds.possibleAccessPaths[0] = selected
+			ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
+			// TODO: Can we make a more careful check on whether the optimization depends on mutable constants?
+			ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
+			if ds.ctx.GetSessionVars().StmtCtx.InExplainStmt {
+				var tableName, pathName string
+				if ds.TableAsName.O == "" {
+					tableName = ds.tableInfo.Name.O
+				} else {
+					tableName = ds.TableAsName.O
+				}
+				if selected.IsTablePath() {
+					pathName = "handle of " + tableName
+				} else {
+					pathName = "index " + selected.Index.Name.O + " of " + tableName
+				}
+				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"))
+			}
+		}
 	}
 
 	// TODO: implement UnionScan + IndexMerge
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index 2b661b674c535..48c6b6f42f0d9 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -328,7 +328,14 @@
       "select f, g from t1 where f = 2 and g in (3, 4, 5)",
       "select * from t1 where c = 1 and (d = 2 or d = 3) and e in (4, 5)",
       "select f, g from t1 where f = 2 and g > 3",
-      "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5);"
+      "select a, b, c from t2 where a = 1 and b = 2 and c in (1, 2, 3, 4, 5)"
+    ]
+  },
+  {
+    "name": "TestMaybeGoodHeuristics",
+    "cases": [
+      "select * from t where b = 3 order by a limit 10",
+      "select * from t where b in (2, 3, 4) order by a limit 10"
     ]
   }
 ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 4a1b76289375e..5a5032cefbe73 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1731,5 +1731,35 @@
         ]
       }
     ]
+  },
+  {
+    "Name": "TestMaybeGoodHeuristics",
+    "Cases": [
+      {
+        "SQL": "select * from t where b = 3 order by a limit 10",
+        "Plan": [
+          "IndexLookUp 10.00 root  limit embedded(offset:0, count:10)",
+          "├─Limit(Build) 10.00 cop[tikv]  offset:0, count:10",
+          "│ └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo",
+          "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
+        ],
+        "Warnings": [
+          "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"
+        ]
+      },
+      {
+        "SQL": "select * from t where b in (2, 3, 4) order by a limit 10",
+        "Plan": [
+          "TopN 0.00 root  test.t.a, offset:0, count:10",
+          "└─IndexLookUp 10.00 root  ",
+          "  ├─TopN(Build) 10.00 cop[tikv]  test.t.a, offset:0, count:10",
+          "  │ └─IndexRangeScan 30.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo",
+          "  └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
+        ],
+        "Warnings": [
+          "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"
+        ]
+      }
+    ]
   }
 ]
diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go
index 1baa10d590da8..7059fe593da54 100644
--- a/sessionctx/variable/session.go
+++ b/sessionctx/variable/session.go
@@ -871,6 +871,9 @@ type SessionVars struct {
 
 	// TemporaryTableData stores committed kv values for temporary table for current session.
 	TemporaryTableData kv.MemBuffer
+
+	// EnableMaybeGoodHeuristics indicates whether to apply maybe-good heuristics when the optimizer generates plans.
+	EnableMaybeGoodHeuristics bool
 }
 
 // AllocMPPTaskID allocates task id for mpp tasks. It will reset the task id if the query's
@@ -1087,6 +1090,7 @@ func NewSessionVars() *SessionVars {
 		CTEMaxRecursionDepth:        DefCTEMaxRecursionDepth,
 		TMPTableSize:                DefTMPTableSize,
 		EnableGlobalTemporaryTable:  DefTiDBEnableGlobalTemporaryTable,
+		EnableMaybeGoodHeuristics:   DefTiDBEnableMaybeGoodHeuristics,
 	}
 	vars.KVVars = tikvstore.NewVariables(&vars.Killed)
 	vars.Concurrency = Concurrency{
diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go
index a791d52063abf..e5fdb87e2a315 100644
--- a/sessionctx/variable/sysvar.go
+++ b/sessionctx/variable/sysvar.go
@@ -1769,6 +1769,10 @@ var defaultSysVars = []*SysVar{
 		s.EnableStableResultMode = TiDBOptOn(val)
 		return nil
 	}},
+	{Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableMaybeGoodHeuristics, Value: BoolToOnOff(DefTiDBEnableMaybeGoodHeuristics), Hidden: true, Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
+		s.EnableMaybeGoodHeuristics = TiDBOptOn(val)
+		return nil
+	}},
 }
 
 // FeedbackProbability points to the FeedbackProbability in statistics package.
diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go
index c500aa4eb6727..e47dc179add04 100644
--- a/sessionctx/variable/tidb_vars.go
+++ b/sessionctx/variable/tidb_vars.go
@@ -580,6 +580,9 @@ const (
 
 	// TiDBEnableOrderedResultMode indicates if stabilize query results.
 	TiDBEnableOrderedResultMode = "tidb_enable_ordered_result_mode"
+
+	// TiDBEnableMaybeGoodHeuristics indicates whether to apply maybe-good heuristics when the optimizer generates plans.
+	TiDBEnableMaybeGoodHeuristics = "tidb_enable_maybe_good_heuristics"
 )
 
 // TiDB vars that have only global scope
@@ -739,6 +742,7 @@ const (
 	DefTMPTableSize                       = 16777216
 	DefTiDBEnableLocalTxn                 = false
 	DefTiDBEnableOrderedResultMode        = false
+	DefTiDBEnableMaybeGoodHeuristics      = false
 )
 
 // Process global variables.

From fb8ed4950b6eab81102d676f148ead368129d83d Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Mon, 9 Aug 2021 12:16:36 +0800
Subject: [PATCH 20/21] fix and add subquery test

---
 planner/core/integration_test.go              |  3 +-
 planner/core/stats.go                         | 30 ++++++++++++++
 .../core/testdata/integration_suite_in.json   |  3 +-
 .../core/testdata/integration_suite_out.json  | 39 ++++++++++++++-----
 4 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index b18dc2b1fdf85..1c83aeb6cfe3d 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -4183,7 +4183,8 @@ func (s *testIntegrationSuite) TestMaybeGoodHeuristics(c *C) {
 	tk.MustExec("use test")
 	tk.MustExec("drop table if exists t")
 	tk.MustExec("create table t(a int primary key, b int, c int, index idx_b(b))")
-  
+	tk.MustExec("set tidb_enable_maybe_good_heuristics = 1")
+
 	var input []string
 	var output []struct {
 		SQL      string
diff --git a/planner/core/stats.go b/planner/core/stats.go
index 3a21a6f14d904..025c23e3faad0 100644
--- a/planner/core/stats.go
+++ b/planner/core/stats.go
@@ -373,6 +373,36 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
 				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(sb.String()))
 			}
 		}
+	} else if ds.ctx.GetSessionVars().EnableMaybeGoodHeuristics && ds.orderByPKLimitN {
+		// maybe-good heuristics
+		// For query like `where index_col = ... order by pk limit n`, if the count of `index_col = ...` is small enough, we prefer the index.
+		for _, path := range ds.possibleAccessPaths {
+			const smallCountAfterAccess = 100
+			if path.OnlyPointRange(ds.ctx.GetSessionVars().StmtCtx) && path.CountAfterAccess < smallCountAfterAccess {
+				selected = path
+				break
+			}
+		}
+		if selected != nil {
+			ds.possibleAccessPaths[0] = selected
+			ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
+			// TODO: Can we make a more careful check on whether the optimization depends on mutable constants?
+			ds.ctx.GetSessionVars().StmtCtx.OptimDependOnMutableConst = true
+			if ds.ctx.GetSessionVars().StmtCtx.InVerboseExplain {
+				var tableName, pathName string
+				if ds.TableAsName.O == "" {
+					tableName = ds.tableInfo.Name.O
+				} else {
+					tableName = ds.TableAsName.O
+				}
+				if selected.IsTablePath() {
+					pathName = "handle of " + tableName
+				} else {
+					pathName = "index " + selected.Index.Name.O + " of " + tableName
+				}
+				ds.ctx.GetSessionVars().StmtCtx.AppendNote(errors.New(pathName + " is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"))
+			}
+		}
 	}
 	return nil
 }
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index 52c863ff0faf6..fcb037db7fc79 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -335,7 +335,8 @@
     "name": "TestMaybeGoodHeuristics",
     "cases": [
       "select * from t where b = 3 order by a limit 10",
-      "select * from t where b in (2, 3, 4) order by a limit 10"
+      "select * from t where b in (2, 3, 4) order by a limit 10",
+      "select * from t as t1 where t1.a > any(select t2.b from t as t2 where t2.b = 2 and t2.c > t1.a order by t2.a limit 10)"
     ]
   },
   {
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 3ca941a7f70f2..06d88db893ddc 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -1738,10 +1738,10 @@
       {
         "SQL": "select * from t where b = 3 order by a limit 10",
         "Plan": [
-          "IndexLookUp 10.00 root  limit embedded(offset:0, count:10)",
-          "├─Limit(Build) 10.00 cop[tikv]  offset:0, count:10",
-          "│ └─IndexRangeScan 10.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo",
-          "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
+          "IndexLookUp_20 10.00 201.28 root  limit embedded(offset:0, count:10)",
+          "├─Limit_19(Build) 10.00 590.00 cop[tikv]  offset:0, count:10",
+          "│ └─IndexRangeScan_17 10.00 590.00 cop[tikv] table:t, index:idx_b(b) range:[3,3], keep order:true, stats:pseudo",
+          "└─TableRowIDScan_18(Probe) 10.00 590.00 cop[tikv] table:t keep order:false, stats:pseudo"
         ],
         "Warnings": [
           "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"
@@ -1750,15 +1750,36 @@
       {
         "SQL": "select * from t where b in (2, 3, 4) order by a limit 10",
         "Plan": [
-          "TopN 0.00 root  test.t.a, offset:0, count:10",
-          "└─IndexLookUp 10.00 root  ",
-          "  ├─TopN(Build) 10.00 cop[tikv]  test.t.a, offset:0, count:10",
-          "  │ └─IndexRangeScan 30.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo",
-          "  └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo"
+          "TopN_9 10.00 379.61 root  test.t.a, offset:0, count:10",
+          "└─IndexLookUp_16 10.00 279.95 root  ",
+          "  ├─TopN_15(Build) 10.00 0.00 cop[tikv]  test.t.a, offset:0, count:10",
+          "  │ └─IndexRangeScan_13 30.00 1770.00 cop[tikv] table:t, index:idx_b(b) range:[2,2], [3,3], [4,4], keep order:false, stats:pseudo",
+          "  └─TableRowIDScan_14(Probe) 10.00 1770.00 cop[tikv] table:t keep order:false, stats:pseudo"
         ],
         "Warnings": [
           "Note 1105 index idx_b of t is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern"
         ]
+      },
+      {
+        "SQL": "select * from t as t1 where t1.a > any(select t2.b from t as t2 where t2.b = t1.b order by t2.a limit 10)",
+        "Plan": [
+          "Projection_16 10000.00 2533281.69 root  test.t.a, test.t.b, test.t.c",
+          "└─Apply_18 10000.00 2527263.69 root  CARTESIAN inner join, other cond:or(gt(test.t.a, Column#8), if(ne(Column#9, 0), NULL, 0))",
+          "  ├─TableReader_20(Build) 10000.00 54251.33 root  data:TableFullScan_19",
+          "  │ └─TableFullScan_19 10000.00 570020.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
+          "  └─Selection_21(Probe) 0.80 244.90 root  ne(Column#10, 0)",
+          "    └─HashAgg_22 1.00 241.90 root  funcs:min(Column#13)->Column#8, funcs:sum(Column#14)->Column#9, funcs:count(1)->Column#10",
+          "      └─Projection_46 8.00 0.00 root  test.t.b, cast(isnull(test.t.b), decimal(22,0) BINARY)->Column#14",
+          "        └─Limit_28 8.00 194.50 root  offset:0, count:10",
+          "          └─IndexLookUp_37 8.00 194.50 root  ",
+          "            ├─IndexRangeScan_34(Build) 10.00 590.00 cop[tikv] table:t2, index:idx_b(b) range:[2,2], keep order:true, stats:pseudo",
+          "            └─Selection_36(Probe) 8.00 0.00 cop[tikv]  gt(test.t.c, test.t.a)",
+          "              └─TableRowIDScan_35 10.00 590.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
+        ],
+        "Warnings": [
+          "Note 1105 index idx_b of t2 is selected since the path has point ranges and fetches limited number of rows under ORDER BY PK LIMIT N pattern",
+          "Note 1105 [t1] remain after pruning paths for t1 given Prop{SortItems: [], TaskTp: rootTask}"
+        ]
       }
     ]
   },

From c451d9eeba6e6c69e34980c22f3a7d0cd9b0cba2 Mon Sep 17 00:00:00 2001
From: xuyifan <675434007@qq.com>
Date: Mon, 9 Aug 2021 12:52:06 +0800
Subject: [PATCH 21/21] remove unused func

---
 planner/core/logical_plan_builder.go | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index f620290339f35..f534ba0154fda 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -3407,15 +3407,6 @@ func (b *PlanBuilder) TableHints() *tableHintInfo {
 	return &(b.tableHintInfo[len(b.tableHintInfo)-1])
 }
 
-func (b *PlanBuilder) setOrderByPKLimitNForDataSource(p LogicalPlan, sel *ast.SelectStmt) {
-	ds, ok := p.(*DataSource)
-	if !ok || sel.OrderBy == nil || sel.Limit == nil {
-		return
-	}
-
-	ds.orderByPKLimitN = true
-}
-
 func checkOrderByPK(ds *DataSource, byItems []*util.ByItems) bool {
 	if ds.tableInfo.PKIsHandle && len(byItems) == 1 {
 		if col, isCol := byItems[0].Expr.(*expression.Column); isCol && col.Equal(nil, ds.getPKIsHandleCol()) {