From 8454e1f77259dfa790ad393894d1370ca2b9e5ae Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 16 Jun 2022 11:04:34 +0800 Subject: [PATCH] planner: introduce new cost formula for IndexLookup (#35408) ref pingcap/tidb#35240 --- planner/core/plan_cost.go | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/planner/core/plan_cost.go b/planner/core/plan_cost.go index 61308dd8fb3db..386a93505ae30 100644 --- a/planner/core/plan_cost.go +++ b/planner/core/plan_cost.go @@ -199,12 +199,14 @@ func (p *PhysicalIndexLookUpReader) GetPlanCost(taskType property.TaskType, cost for tmp = p.tablePlan; len(tmp.Children()) > 0; tmp = tmp.Children()[0] { } ts := tmp.(*PhysicalTableScan) - tblCost, err := ts.GetPlanCost(property.CopDoubleReadTaskType, costFlag) - if err != nil { - return 0, err + if p.ctx.GetSessionVars().CostModelVersion == modelVer1 { + tblCost, err := ts.GetPlanCost(property.CopDoubleReadTaskType, costFlag) + if err != nil { + return 0, err + } + p.planCost -= tblCost + p.planCost += getCardinality(p.indexPlan, costFlag) * ts.getScanRowSize() * p.SCtx().GetSessionVars().GetScanFactor(ts.Table) } - p.planCost -= tblCost - p.planCost += getCardinality(p.indexPlan, costFlag) * ts.getScanRowSize() * p.SCtx().GetSessionVars().GetScanFactor(ts.Table) // index-side net I/O cost: rows * row-size * net-factor netFactor := getTableNetFactor(p.tablePlan) @@ -221,6 +223,12 @@ func (p *PhysicalIndexLookUpReader) GetPlanCost(taskType property.TaskType, cost // table-side seek cost p.planCost += estimateNetSeekCost(p.tablePlan) + if p.ctx.GetSessionVars().CostModelVersion == modelVer2 { + // accumulate the real double-read cost: numDoubleReadTasks * seekFactor + numDoubleReadTasks := p.estNumDoubleReadTasks(costFlag) + p.planCost += numDoubleReadTasks * p.ctx.GetSessionVars().GetSeekFactor(ts.Table) + } + // consider concurrency p.planCost /= float64(p.ctx.GetSessionVars().DistSQLScanConcurrency()) @@ -230,6 +238,16 @@ func (p *PhysicalIndexLookUpReader) GetPlanCost(taskType property.TaskType, cost return p.planCost, nil } +func (p *PhysicalIndexLookUpReader) estNumDoubleReadTasks(costFlag uint64) float64 { + doubleReadRows := p.indexPlan.StatsCount() + batchSize := float64(p.ctx.GetSessionVars().IndexLookupSize) + // distRatio indicates how many requests corresponding to a batch, current value is from experiments. + // TODO: estimate it by using index correlation or make it configurable. + distRatio := 40.0 + numDoubleReadTasks := (doubleReadRows / batchSize) * distRatio + return numDoubleReadTasks // use Float64 instead of Int like `Ceil(...)` to make the cost continuous +} + // GetPlanCost calculates the cost of the plan if it has not been calculated yet and returns the cost. func (p *PhysicalIndexReader) GetPlanCost(taskType property.TaskType, costFlag uint64) (float64, error) { if p.planCostInit && !hasCostFlag(costFlag, CostFlagRecalculate) {