Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: adjust estimated row count when pushing Limit and keep order for partitioned table #41103

Merged
merged 9 commits into from
Feb 9, 2023
40 changes: 40 additions & 0 deletions planner/core/integration_partition_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1655,3 +1655,43 @@ func TestPartitionProcessorWithUninitializedTable(t *testing.T) {
}
tk.MustQuery("explain format=brief select * from q1,q2").CheckAt([]int{0}, rows)
}

func TestEstimationForTopNPushToDynamicPartition(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("set tidb_cost_model_version=2")
tk.MustExec("drop table if exists tlist")
tk.MustExec(`set tidb_enable_list_partition = 1`)
tk.MustExec(`create table trange (a int, b int, c int, index ia(a), primary key (b) clustered)
partition by range(b) (
partition p1 values less than(100),
partition p2 values less than(200),
partition p3 values less than maxvalue);`)
tk.MustExec(`create table tlist (a int, b int, c int, index ia(a), primary key (b) clustered)
partition by list (b) (
partition p0 values in (0, 1, 2),
partition p1 values in (3, 4, 5));`)
tk.MustExec(`create table thash (a int, b int, c int, index ia(a), primary key (b) clustered)
partition by hash(b) partitions 4;`)
tk.MustExec(`create table t (a int, b int, c int, index ia(a), primary key (b) clustered);`)
tk.MustExec(`analyze table trange;`)
tk.MustExec(`analyze table tlist;`)
tk.MustExec(`analyze table thash;`)
tk.MustExec(`analyze table t;`)

var input []string
var output []struct {
SQL string
Plan []string
}
integrationPartitionSuiteData := core.GetIntegrationPartitionSuiteData()
integrationPartitionSuiteData.LoadTestCases(t, &input, &output)
for i, tt := range input {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
})
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
}
}
8 changes: 8 additions & 0 deletions planner/core/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,9 @@ type PhysicalPlan interface {
// Stats returns the StatsInfo of the plan.
Stats() *property.StatsInfo

// SetStats sets basePlan.stats inside the basePhysicalPlan.
SetStats(s *property.StatsInfo)

// ExplainNormalizedInfo returns operator normalized information for generating digest.
ExplainNormalizedInfo() string

Expand Down Expand Up @@ -815,6 +818,11 @@ func (p *basePlan) Stats() *property.StatsInfo {
return p.stats
}

// SetStats sets basePlan.stats
func (p *basePlan) SetStats(s *property.StatsInfo) {
p.stats = s
}

// basePlanSize is the size of basePlan.
const basePlanSize = int64(unsafe.Sizeof(basePlan{}))

Expand Down
36 changes: 36 additions & 0 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,10 @@ func (p *PhysicalTopN) pushTopNDownToDynamicPartition(copTsk *copTask) (task, bo
return true
}
var (
selOnIdxScan *PhysicalSelection
selOnTblScan *PhysicalSelection
selSelectivity float64

idxScan *PhysicalIndexScan
tblScan *PhysicalTableScan
tblInfo *model.TableInfo
Expand All @@ -1044,6 +1048,7 @@ func (p *PhysicalTopN) pushTopNDownToDynamicPartition(copTsk *copTask) (task, bo
}
finalIdxScanPlan := copTsk.indexPlan
for len(finalIdxScanPlan.Children()) > 0 && finalIdxScanPlan.Children()[0] != nil {
selOnIdxScan, _ = finalIdxScanPlan.(*PhysicalSelection)
finalIdxScanPlan = finalIdxScanPlan.Children()[0]
}
idxScan = finalIdxScanPlan.(*PhysicalIndexScan)
Expand All @@ -1056,12 +1061,21 @@ func (p *PhysicalTopN) pushTopNDownToDynamicPartition(copTsk *copTask) (task, bo
}
finalTblScanPlan := copTsk.tablePlan
for len(finalTblScanPlan.Children()) > 0 {
selOnTblScan, _ = finalTblScanPlan.(*PhysicalSelection)
finalTblScanPlan = finalTblScanPlan.Children()[0]
}
tblScan = finalTblScanPlan.(*PhysicalTableScan)
tblInfo = tblScan.Table
}

// Note that we only need to care about one Selection at most.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only care about specified tree type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. From the current implementation, the only thing that could possibly be here is a Selection.
@winoros can you verify this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

if selOnIdxScan != nil && idxScan.statsInfo().RowCount > 0 {
selSelectivity = selOnIdxScan.statsInfo().RowCount / idxScan.statsInfo().RowCount
}
if idxScan == nil && selOnTblScan != nil && tblScan.statsInfo().RowCount > 0 {
selSelectivity = selOnTblScan.statsInfo().RowCount / tblScan.statsInfo().RowCount
}

pi := tblInfo.GetPartitionInfo()
if pi == nil {
return nil, false
Expand All @@ -1083,6 +1097,17 @@ func (p *PhysicalTopN) pushTopNDownToDynamicPartition(copTsk *copTask) (task, bo
}.Init(p.SCtx(), stats, p.SelectBlockOffset())
pushedLimit.SetSchema(copTsk.indexPlan.Schema())
copTsk = attachPlan2Task(pushedLimit, copTsk).(*copTask)

// A similar but simplified logic compared the ExpectedCnt handling logic in getOriginalPhysicalIndexScan.
child := pushedLimit.Children()[0]
// The row count of the direct child of Limit should be adjusted to be no larger than the Limit.Count.
child.SetStats(child.statsInfo().ScaleByExpectCnt(float64(newCount)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can call ScaleByExpectCnt directly? That func changed the pointer itself.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the stats are not ref-ed by another OP, just ScaleByExpectCnt is ok.
some times the final-agg and partial-agg will share the same stats (not sure if other cases exist). (same pointer)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can call ScaleByExpectCnt directly? That func changed the pointer itself.

No. It won't change itself.

// The Limit->Selection->IndexScan case:
// adjust the row count of IndexScan according to the selectivity of the Selection.
if selSelectivity > 0 && selSelectivity < 1 {
scaledRowCount := child.Stats().RowCount / selSelectivity
idxScan.SetStats(idxScan.Stats().ScaleByExpectCnt(scaledRowCount))
}
} else if copTsk.indexPlan == nil {
if tblScan.HandleCols == nil {
return nil, false
Expand Down Expand Up @@ -1111,6 +1136,17 @@ func (p *PhysicalTopN) pushTopNDownToDynamicPartition(copTsk *copTask) (task, bo
}.Init(p.SCtx(), stats, p.SelectBlockOffset())
pushedLimit.SetSchema(copTsk.tablePlan.Schema())
copTsk = attachPlan2Task(pushedLimit, copTsk).(*copTask)

// A similar but simplified logic compared the ExpectedCnt handling logic in getOriginalPhysicalTableScan.
child := pushedLimit.Children()[0]
// The row count of the direct child of Limit should be adjusted to be no larger than the Limit.Count.
child.SetStats(child.statsInfo().ScaleByExpectCnt(float64(newCount)))
// The Limit->Selection->TableScan case:
// adjust the row count of IndexScan according to the selectivity of the Selection.
if selSelectivity > 0 && selSelectivity < 1 {
scaledRowCount := child.Stats().RowCount / selSelectivity
tblScan.SetStats(tblScan.Stats().ScaleByExpectCnt(scaledRowCount))
}
} else {
return nil, false
}
Expand Down
29 changes: 29 additions & 0 deletions planner/core/testdata/integration_partition_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,34 @@
"explain format='brief' select a from tcollist limit 10",
"explain format='brief' select a from tcollist order by a limit 10"
]
},
{
"name": "TestEstimationForTopNPushToDynamicPartition",
"cases": [
"explain format='brief' select a from t use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select a from trange use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select a from tlist use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select a from thash use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select * from t use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select * from trange use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select * from tlist use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select * from thash use index (ia) where a > 10 order by a limit 10",
"explain format='brief' select * from t use index (ia) where a + 1 > 10 order by a limit 10",
"explain format='brief' select * from trange use index (ia) where a + 1 > 10 order by a limit 10",
"explain format='brief' select * from tlist use index (ia) where a + 1 > 10 order by a limit 10",
"explain format='brief' select * from thash use index (ia) where a + 1 > 10 order by a limit 10",
"explain format='brief' select a from t use index (ia) where a > 10 and c = 10 order by a limit 10",
"explain format='brief' select a from trange use index (ia) where a > 10 and c = 10 order by a limit 10",
"explain format='brief' select a from tlist use index (ia) where a > 10 and c = 10 order by a limit 10",
"explain format='brief' select a from thash use index (ia) where a > 10 and c = 10 order by a limit 10",
"explain format='brief' select a from t use index () where b > 10 order by b limit 10",
"explain format='brief' select a from trange use index () where b > 10 order by b limit 10",
"explain format='brief' select a from tlist use index () where b > 10 order by b limit 10",
"explain format='brief' select a from thash use index () where b > 10 order by b limit 10",
"explain format='brief' select a from t use index () where a > 10 order by b limit 10",
"explain format='brief' select a from trange use index () where a > 10 order by b limit 10",
"explain format='brief' select a from tlist use index () where a > 10 order by b limit 10",
"explain format='brief' select a from thash use index () where a > 10 order by b limit 10"
]
}
]
Loading