Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: log the reason why the sample-rate is chosen when analyzing table #45938

Merged
merged 14 commits into from
Aug 10, 2023
3 changes: 2 additions & 1 deletion executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,8 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy
zap.String("job info", job.JobInfo),
zap.Time("start time", job.StartTime),
zap.Time("end time", job.EndTime),
zap.String("cost", job.EndTime.Sub(job.StartTime).String()))
zap.String("cost", job.EndTime.Sub(job.StartTime).String()),
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
zap.String("sample rate reason", job.SampleRateReason))
}
}

Expand Down
31 changes: 20 additions & 11 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package executor
import (
"bytes"
"context"
"fmt"
"math"
"strconv"
"strings"
Expand Down Expand Up @@ -2706,10 +2707,11 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC
modifyCount = int64(val.(int))
})
sampleRate := new(float64)
var sampleRateReason string
if opts[ast.AnalyzeOptNumSamples] == 0 {
*sampleRate = math.Float64frombits(opts[ast.AnalyzeOptSampleRate])
if *sampleRate < 0 {
*sampleRate = b.getAdjustedSampleRate(task)
*sampleRate, sampleRateReason = b.getAdjustedSampleRate(task)
if task.PartitionName != "" {
sc.AppendNote(errors.Errorf(
"Analyze use auto adjusted sample rate %f for table %s.%s's partition %s",
Expand All @@ -2718,20 +2720,25 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC
task.TableName,
task.PartitionName,
))
sc.AppendNote(errors.Errorf("The reason to choose this sample rate for %s.%s's partition %s is: %s",
task.DBName, task.TableName, task.PartitionName, sampleRateReason))
} else {
sc.AppendNote(errors.Errorf(
"Analyze use auto adjusted sample rate %f for table %s.%s",
*sampleRate,
task.DBName,
task.TableName,
))
sc.AppendNote(errors.Errorf("The reason to choose this sample rate for table %s.%s is: %s",
task.DBName, task.TableName, sampleRateReason))
}
}
}
job := &statistics.AnalyzeJob{
DBName: task.DBName,
TableName: task.TableName,
PartitionName: task.PartitionName,
DBName: task.DBName,
TableName: task.TableName,
PartitionName: task.PartitionName,
SampleRateReason: sampleRateReason,
}

base := baseAnalyzeExec{
Expand Down Expand Up @@ -2788,11 +2795,11 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC
// If we take n = 1e12, a 300*k sample still gives <= 0.66 bin size error with probability 0.99.
// So if we don't consider the top-n values, we can keep the sample size at 300*256.
// But we may take some top-n before building the histogram, so we increase the sample a little.
func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsTask) float64 {
func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsTask) (sampleRate float64, reason string) {
statsHandle := domain.GetDomain(b.ctx).StatsHandle()
defaultRate := 0.001
if statsHandle == nil {
return defaultRate
return defaultRate, fmt.Sprintf("statsHandler is nil, use the default-rate=%v", defaultRate)
}
var statsTbl *statistics.Table
tid := task.TableID.GetStatisticsID()
Expand All @@ -2804,11 +2811,11 @@ func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsT
approxiCount, hasPD := b.getApproximateTableCountFromStorage(tid, task)
// If there's no stats meta and no pd, return the default rate.
if statsTbl == nil && !hasPD {
return defaultRate
return defaultRate, fmt.Sprintf("statsTbl is nil and no pd info, use the default-rate=%v", defaultRate)
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
// If the count in stats_meta is still 0 and there's no information from pd side, we scan all rows.
if statsTbl.RealtimeCount == 0 && !hasPD {
return 1
return 1, fmt.Sprintf("statsTbl.RealtimeCount is 0 and no pd info, use sample-rate=1")
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
// we have issue https://github.com/pingcap/tidb/issues/29216.
// To do a workaround for this issue, we check the approxiCount from the pd side to do a comparison.
Expand All @@ -2817,15 +2824,17 @@ func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsT
if float64(statsTbl.RealtimeCount*5) < approxiCount {
// Confirmed by TiKV side, the experience error rate of the approximate count is about 20%.
// So we increase the number to 150000 to reduce this error rate.
return math.Min(1, 150000/approxiCount)
sampleRate = math.Min(1, 150000/approxiCount)
return sampleRate, fmt.Sprintf("statsTbl.RealtimeCount is too small, use min(1, 15000/%v) as the sample-rate=%v", approxiCount, sampleRate)
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
// If we don't go into the above if branch and we still detect the count is zero. Return 1 to prevent the dividing zero.
if statsTbl.RealtimeCount == 0 {
return 1
return 1, fmt.Sprintf("statsTbl.RealtimeCount is 0, use sample-rate=1")
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
}
// We are expected to scan about 100000 rows or so.
// Since there's tiny error rate around the count from the stats meta, we use 110000 to get a little big result
return math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount))
sampleRate = math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount))
return sampleRate, fmt.Sprintf("use min(1, %v/%v) as the sample-rate=%v", config.DefRowsForSampleRate, statsTbl.RealtimeCount, sampleRate)
}

func (b *executorBuilder) getApproximateTableCountFromStorage(tid int64, task plannercore.AnalyzeColumnsTask) (float64, bool) {
Expand Down
2 changes: 2 additions & 0 deletions statistics/analyze_jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ type AnalyzeJob struct {
PartitionName string
JobInfo string
Progress AnalyzeProgress

SampleRateReason string // why this sample-rate is chosen
}

// AnalyzeProgress represents the process of one analyze job.
Expand Down