diff --git a/executor/analyze.go b/executor/analyze.go index 13c9d8dc8a9ec..29029033fc627 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -594,7 +594,8 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy zap.String("job info", job.JobInfo), zap.Time("start time", job.StartTime), zap.Time("end time", job.EndTime), - zap.String("cost", job.EndTime.Sub(job.StartTime).String())) + zap.String("cost", job.EndTime.Sub(job.StartTime).String()), + zap.String("sample rate reason", job.SampleRateReason)) } } diff --git a/executor/analyze_test.go b/executor/analyze_test.go index f981c48465296..74b0fb94b1904 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -431,8 +431,8 @@ func TestMergeGlobalStatsWithUnAnalyzedPartition(t *testing.T) { tk.MustExec("analyze table t partition p2 index idxc;") tk.MustQuery("show warnings").Check(testkit.Rows( "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) tk.MustExec("analyze table t partition p0;") tk.MustQuery("show warnings").Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/2) as the sample-rate=1\"")) } diff --git a/executor/builder.go b/executor/builder.go index 4e199d6a92b57..a394d95a23f57 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -17,6 +17,7 @@ package executor import ( "bytes" "context" + "fmt" "math" "strconv" "strings" @@ -2706,32 +2707,36 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC modifyCount = int64(val.(int)) }) sampleRate := new(float64) + var sampleRateReason string if opts[ast.AnalyzeOptNumSamples] == 0 { *sampleRate = math.Float64frombits(opts[ast.AnalyzeOptSampleRate]) if *sampleRate < 0 { - *sampleRate = b.getAdjustedSampleRate(task) + *sampleRate, sampleRateReason = b.getAdjustedSampleRate(task) if task.PartitionName != "" { sc.AppendNote(errors.Errorf( - "Analyze use auto adjusted sample rate %f for table %s.%s's partition %s", + `Analyze use auto adjusted sample rate %f for table %s.%s's partition %s, reason to use this rate is "%s"`, *sampleRate, task.DBName, task.TableName, task.PartitionName, + sampleRateReason, )) } else { sc.AppendNote(errors.Errorf( - "Analyze use auto adjusted sample rate %f for table %s.%s", + `Analyze use auto adjusted sample rate %f for table %s.%s, reason to use this rate is "%s"`, *sampleRate, task.DBName, task.TableName, + sampleRateReason, )) } } } job := &statistics.AnalyzeJob{ - DBName: task.DBName, - TableName: task.TableName, - PartitionName: task.PartitionName, + DBName: task.DBName, + TableName: task.TableName, + PartitionName: task.PartitionName, + SampleRateReason: sampleRateReason, } base := baseAnalyzeExec{ @@ -2788,11 +2793,11 @@ func (b *executorBuilder) buildAnalyzeSamplingPushdown(task plannercore.AnalyzeC // If we take n = 1e12, a 300*k sample still gives <= 0.66 bin size error with probability 0.99. // So if we don't consider the top-n values, we can keep the sample size at 300*256. // But we may take some top-n before building the histogram, so we increase the sample a little. -func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsTask) float64 { +func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsTask) (sampleRate float64, reason string) { statsHandle := domain.GetDomain(b.ctx).StatsHandle() defaultRate := 0.001 if statsHandle == nil { - return defaultRate + return defaultRate, fmt.Sprintf("statsHandler is nil, use the default-rate=%v", defaultRate) } var statsTbl *statistics.Table tid := task.TableID.GetStatisticsID() @@ -2804,11 +2809,11 @@ func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsT approxiCount, hasPD := b.getApproximateTableCountFromStorage(tid, task) // If there's no stats meta and no pd, return the default rate. if statsTbl == nil && !hasPD { - return defaultRate + return defaultRate, fmt.Sprintf("TiDB cannot get the row count of the table, use the default-rate=%v", defaultRate) } // If the count in stats_meta is still 0 and there's no information from pd side, we scan all rows. if statsTbl.RealtimeCount == 0 && !hasPD { - return 1 + return 1, "TiDB assumes that the table is empty and cannot get row count from PD, use sample-rate=1" } // we have issue https://github.com/pingcap/tidb/issues/29216. // To do a workaround for this issue, we check the approxiCount from the pd side to do a comparison. @@ -2817,15 +2822,17 @@ func (b *executorBuilder) getAdjustedSampleRate(task plannercore.AnalyzeColumnsT if float64(statsTbl.RealtimeCount*5) < approxiCount { // Confirmed by TiKV side, the experience error rate of the approximate count is about 20%. // So we increase the number to 150000 to reduce this error rate. - return math.Min(1, 150000/approxiCount) + sampleRate = math.Min(1, 150000/approxiCount) + return sampleRate, fmt.Sprintf("Row count in stats_meta is much smaller compared with the row count got by PD, use min(1, 15000/%v) as the sample-rate=%v", approxiCount, sampleRate) } // If we don't go into the above if branch and we still detect the count is zero. Return 1 to prevent the dividing zero. if statsTbl.RealtimeCount == 0 { - return 1 + return 1, "TiDB assumes that the table is empty, use sample-rate=1" } // We are expected to scan about 100000 rows or so. // Since there's tiny error rate around the count from the stats meta, we use 110000 to get a little big result - return math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount)) + sampleRate = math.Min(1, config.DefRowsForSampleRate/float64(statsTbl.RealtimeCount)) + return sampleRate, fmt.Sprintf("use min(1, %v/%v) as the sample-rate=%v", config.DefRowsForSampleRate, statsTbl.RealtimeCount, sampleRate) } func (b *executorBuilder) getApproximateTableCountFromStorage(tid int64, task plannercore.AnalyzeColumnsTask) (float64, bool) { diff --git a/executor/infoschema_reader_test.go b/executor/infoschema_reader_test.go index 138f2d84071c2..1b8e044971334 100644 --- a/executor/infoschema_reader_test.go +++ b/executor/infoschema_reader_test.go @@ -594,7 +594,7 @@ func TestForAnalyzeStatus(t *testing.T) { tk.MustExec("create table t1 (a int, b int, index idx(a))") tk.MustExec("insert into t1 values (1,2),(3,4)") tk.MustExec("analyze table t1") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t1")) // 1 note. + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) // 1 note. require.NoError(t, dom.StatsHandle().LoadNeededHistograms()) tk.MustExec("CREATE ROLE r_t1 ;") tk.MustExec("GRANT ALL PRIVILEGES ON test.t1 TO r_t1;") diff --git a/executor/test/analyzetest/analyze_test.go b/executor/test/analyzetest/analyze_test.go index 25934493e075a..11e168105fab9 100644 --- a/executor/test/analyzetest/analyze_test.go +++ b/executor/test/analyzetest/analyze_test.go @@ -669,14 +669,14 @@ func TestAdjustSampleRateNote(t *testing.T) { result := tk.MustQuery("show stats_meta where table_name = 't'") require.Equal(t, "220000", result.Rows()[0][5]) tk.MustExec("analyze table t") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t, reason to use this rate is \"use min(1, 110000/220000) as the sample-rate=0.5\"")) tk.MustExec("insert into t values(1),(1),(1)") require.NoError(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll)) require.NoError(t, statsHandle.Update(is)) result = tk.MustQuery("show stats_meta where table_name = 't'") require.Equal(t, "3", result.Rows()[0][5]) tk.MustExec("analyze table t") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) } func TestFastAnalyze4GlobalStats(t *testing.T) { @@ -925,7 +925,7 @@ func TestSmallTableAnalyzeV2(t *testing.T) { tk.MustExec("create table small_table_inject_pd(a int)") tk.MustExec("insert into small_table_inject_pd values(1), (2), (3), (4), (5)") tk.MustExec("analyze table small_table_inject_pd") - tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd")) + tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"")) tk.MustExec(` create table small_table_inject_pd_with_partition( a int @@ -937,9 +937,9 @@ create table small_table_inject_pd_with_partition( tk.MustExec("insert into small_table_inject_pd_with_partition values(1), (6), (11)") tk.MustExec("analyze table small_table_inject_pd_with_partition") tk.MustQuery("show warnings").Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", )) rows := [][]interface{}{ {"global", "a"}, @@ -1421,7 +1421,7 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1489,7 +1489,7 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1566,7 +1566,7 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1647,8 +1647,8 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1799,8 +1799,8 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1904,7 +1904,7 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -1974,7 +1974,7 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) { case model.ColumnList: tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", )) case model.PredicateColumns: @@ -2096,6 +2096,26 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) { } } +func TestAnalyzeSampleRateReason(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a int, b int)") + require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll)) + + tk.MustExec(`analyze table t`) + tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows( + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`)) + + tk.MustExec(`insert into t values (1, 1), (2, 2), (3, 3)`) + require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll)) + tk.MustExec(`analyze table t`) + tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows( + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty, use sample-rate=1"`)) +} + func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) @@ -2120,7 +2140,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { // If no predicate column is collected, analyze predicate columns gives a warning and falls back to analyze all columns. tk.MustExec("analyze table t predicate columns") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`, "Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed", )) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() @@ -2145,7 +2165,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { tk.MustExec("analyze table t predicate columns") } tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty, use sample-rate=1"`, "Warning 1105 Table test.t has version 1 statistics so all the columns must be analyzed to overwrite the current statistics", )) }(val) @@ -2731,7 +2751,7 @@ PARTITION BY RANGE ( a ) ( // analyze partition with options under dynamic mode tk.MustExec("analyze table t partition p0 columns a,b,c with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", @@ -2745,7 +2765,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("analyze table t partition p0") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/9) as the sample-rate=1\"", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", )) @@ -2801,7 +2821,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -2810,7 +2830,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("set global tidb_persist_analyze_options = true") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -2819,7 +2839,7 @@ PARTITION BY RANGE ( a ) ( tk.MustExec("insert into mysql.analyze_options values (?,?,?,?,?,?,?)", pi.Definitions[1].ID, 0, 0, 1, 1, "DEFAULT", "") tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions", )) @@ -3224,7 +3244,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) { tk.MustExec("analyze table t columns a") tk.MustQuery("show warnings").Sort().Check(testkit.Rows(""+ - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx_c")) tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Check(testkit.Rows( diff --git a/planner/core/indexmerge_path_test.go b/planner/core/indexmerge_path_test.go index e676b82d6f1a1..66d1a0bd9baec 100644 --- a/planner/core/indexmerge_path_test.go +++ b/planner/core/indexmerge_path_test.go @@ -39,12 +39,12 @@ index idx2(a, b, (cast(j->'$.str' as char(10) array)), c))`) tk.MustExec("set tidb_analyze_version=2") tk.MustExec("analyze table t") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) tk.MustExec("analyze table t index idx") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"TiDB assumes that the table is empty, use sample-rate=1\"", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index f74ec6da97c11..dda7061022c49 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -2953,7 +2953,7 @@ func TestIncrementalAnalyzeStatsVer2(t *testing.T) { require.Len(t, warns, 3) require.EqualError(t, warns[0].Err, "The version 2 would collect all statistics not only the selected indexes") require.EqualError(t, warns[1].Err, "The version 2 stats would ignore the INCREMENTAL keyword and do full sampling") - require.EqualError(t, warns[2].Err, "Analyze use auto adjusted sample rate 1.000000 for table test.t") + require.EqualError(t, warns[2].Err, "Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"") rows = tk.MustQuery(fmt.Sprintf("select distinct_count from mysql.stats_histograms where table_id = %d and is_index = 1", tblID)).Rows() require.Len(t, rows, 1) require.Equal(t, "6", rows[0][0]) diff --git a/statistics/analyze_jobs.go b/statistics/analyze_jobs.go index e93d3c427192a..cbbbf48b0ecb0 100644 --- a/statistics/analyze_jobs.go +++ b/statistics/analyze_jobs.go @@ -28,7 +28,9 @@ type AnalyzeJob struct { TableName string PartitionName string JobInfo string - Progress AnalyzeProgress + + SampleRateReason string // why this sample-rate is chosen + Progress AnalyzeProgress } // AnalyzeProgress represents the process of one analyze job. diff --git a/statistics/handle/handletest/analyze/analyze_test.go b/statistics/handle/handletest/analyze/analyze_test.go index b785fb0b2c648..6c42871bc8a62 100644 --- a/statistics/handle/handletest/analyze/analyze_test.go +++ b/statistics/handle/handletest/analyze/analyze_test.go @@ -242,7 +242,7 @@ func TestFMSWithAnalyzePartition(t *testing.T) { tk.MustQuery("select count(*) from mysql.stats_fm_sketch").Check(testkit.Rows("0")) tk.MustExec("analyze table t partition p0 with 1 topn, 2 buckets") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Ignore columns and options when analyze partition in dynamic mode", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", "Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`", diff --git a/statistics/integration_test.go b/statistics/integration_test.go index ea01cee24fccc..0fcd201cb1a31 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -169,7 +169,7 @@ func TestChangeVerTo2BehaviorWithPersistedOptions(t *testing.T) { tk.MustExec("analyze table t index idx") tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) // since fallback to ver2 path, should do samplerate adjustment + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) // since fallback to ver2 path, should do samplerate adjustment require.NoError(t, h.Update(is)) statsTblT = h.GetTableStats(tblT.Meta()) for _, idx := range statsTblT.Indices { @@ -178,7 +178,7 @@ func TestChangeVerTo2BehaviorWithPersistedOptions(t *testing.T) { tk.MustExec("analyze table t index") tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t")) + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\"")) require.NoError(t, h.Update(is)) statsTblT = h.GetTableStats(tblT.Meta()) for _, idx := range statsTblT.Indices {