Skip to content

Commit

Permalink
statistics: add metrics for unneeded analyze table (#54822)
Browse files Browse the repository at this point in the history
close #54823
  • Loading branch information
hawkingrei authored Jul 26, 2024
1 parent b41ad70 commit 7e73ddc
Show file tree
Hide file tree
Showing 20 changed files with 112 additions and 115 deletions.
1 change: 0 additions & 1 deletion pkg/executor/test/analyzetest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ go_test(
"//pkg/sessionctx",
"//pkg/sessionctx/variable",
"//pkg/statistics",
"//pkg/statistics/handle/autoanalyze/exec",
"//pkg/testkit",
"//pkg/testkit/analyzehelper",
"//pkg/util/dbterror/exeerrors",
Expand Down
27 changes: 13 additions & 14 deletions pkg/executor/test/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import (
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/exec"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/testkit/analyzehelper"
"github.com/pingcap/tidb/pkg/util/dbterror/exeerrors"
Expand Down Expand Up @@ -703,11 +702,11 @@ func TestSavedAnalyzeOptions(t *testing.T) {
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
}()
tk.MustExec("set global tidb_auto_analyze_ratio = 0.01")
originalVal3 := exec.AutoAnalyzeMinCnt
originalVal3 := statistics.AutoAnalyzeMinCnt
defer func() {
exec.AutoAnalyzeMinCnt = originalVal3
statistics.AutoAnalyzeMinCnt = originalVal3
}()
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0

tk.MustExec("use test")
tk.MustExec("set @@session.tidb_analyze_version = 2")
Expand Down Expand Up @@ -1046,11 +1045,11 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) {
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
}()
tk.MustExec("set global tidb_auto_analyze_ratio = 0.01")
originalVal3 := exec.AutoAnalyzeMinCnt
originalVal3 := statistics.AutoAnalyzeMinCnt
defer func() {
exec.AutoAnalyzeMinCnt = originalVal3
statistics.AutoAnalyzeMinCnt = originalVal3
}()
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
originalVal4 := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string)
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal4))
Expand Down Expand Up @@ -1888,9 +1887,9 @@ func testKillAutoAnalyze(t *testing.T, ver int) {
tk := testkit.NewTestKit(t, store)
oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string)
oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string)
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart))
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd))
}()
Expand Down Expand Up @@ -1972,9 +1971,9 @@ func TestKillAutoAnalyzeIndex(t *testing.T) {
tk := testkit.NewTestKit(t, store)
oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string)
oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string)
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart))
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd))
}()
Expand Down Expand Up @@ -2733,12 +2732,12 @@ func TestAutoAnalyzeAwareGlobalVariableChange(t *testing.T) {
"3 0",
))

originalVal1 := exec.AutoAnalyzeMinCnt
originalVal1 := statistics.AutoAnalyzeMinCnt
originalVal2 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string)
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
tk.MustExec("set global tidb_auto_analyze_ratio = 0.001")
defer func() {
exec.AutoAnalyzeMinCnt = originalVal1
statistics.AutoAnalyzeMinCnt = originalVal1
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2))
}()

Expand Down
2 changes: 1 addition & 1 deletion pkg/executor/test/analyzetest/memorycontrol/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ go_test(
"//pkg/config",
"//pkg/executor",
"//pkg/sessionctx/variable",
"//pkg/statistics/handle/autoanalyze/exec",
"//pkg/statistics",
"//pkg/testkit",
"//pkg/util",
"@com_github_pingcap_failpoint//:failpoint",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/executor"
"github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/exec"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/util"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -144,12 +144,12 @@ func TestGlobalMemoryControlForAutoAnalyze(t *testing.T) {
require.Len(t, rs0.Rows(), 0)

h := dom.StatsHandle()
originalVal4 := exec.AutoAnalyzeMinCnt
originalVal4 := statistics.AutoAnalyzeMinCnt
originalVal5 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string)
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
tk.MustExec("set global tidb_auto_analyze_ratio = 0.001")
defer func() {
exec.AutoAnalyzeMinCnt = originalVal4
statistics.AutoAnalyzeMinCnt = originalVal4
tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal5))
}()

Expand Down
1 change: 0 additions & 1 deletion pkg/statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ go_test(
"//pkg/parser/mysql",
"//pkg/sessionctx",
"//pkg/sessionctx/stmtctx",
"//pkg/statistics/handle/autoanalyze/exec",
"//pkg/testkit",
"//pkg/testkit/analyzehelper",
"//pkg/testkit/testdata",
Expand Down
1 change: 0 additions & 1 deletion pkg/statistics/handle/autoanalyze/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ go_test(
"//pkg/sessionctx",
"//pkg/sessionctx/variable",
"//pkg/statistics",
"//pkg/statistics/handle/autoanalyze/exec",
"//pkg/statistics/handle/util",
"//pkg/statistics/handle/util/test",
"//pkg/testkit",
Expand Down
4 changes: 2 additions & 2 deletions pkg/statistics/handle/autoanalyze/autoanalyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ func tryAutoAnalyzeTable(
// Pseudo statistics can be created by the optimizer, so we need to double check it.
// 2. If the table is too small, we don't want to waste time to analyze it.
// Leave the opportunity to other bigger tables.
if statsTbl == nil || statsTbl.Pseudo || statsTbl.RealtimeCount < exec.AutoAnalyzeMinCnt {
if statsTbl == nil || statsTbl.Pseudo || statsTbl.RealtimeCount < statistics.AutoAnalyzeMinCnt {
return false
}

Expand Down Expand Up @@ -558,7 +558,7 @@ func tryAutoAnalyzePartitionTableInDynamicMode(
// Pseudo statistics can be created by the optimizer, so we need to double check it.
// 2. If the table is too small, we don't want to waste time to analyze it.
// Leave the opportunity to other bigger tables.
if partitionStats == nil || partitionStats.Pseudo || partitionStats.RealtimeCount < exec.AutoAnalyzeMinCnt {
if partitionStats == nil || partitionStats.Pseudo || partitionStats.RealtimeCount < statistics.AutoAnalyzeMinCnt {
continue
}
if needAnalyze, reason := NeedAnalyzeTable(
Expand Down
31 changes: 15 additions & 16 deletions pkg/statistics/handle/autoanalyze/autoanalyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze"
"github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/exec"
statsutil "github.com/pingcap/tidb/pkg/statistics/handle/util"
"github.com/pingcap/tidb/pkg/statistics/handle/util/test"
"github.com/pingcap/tidb/pkg/testkit"
Expand All @@ -58,9 +57,9 @@ func TestEnableAutoAnalyzePriorityQueue(t *testing.T) {
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(context.Background(), is))
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
}()
require.True(t, dom.StatsHandle().HandleAutoAnalyze())
}
Expand All @@ -79,9 +78,9 @@ func TestAutoAnalyzeLockedTable(t *testing.T) {
tk.MustExec("lock stats t")
is := dom.InfoSchema()
require.NoError(t, h.Update(context.Background(), is))
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
}()
// Try to analyze the locked table, it should not analyze the table.
require.False(t, dom.StatsHandle().HandleAutoAnalyze())
Expand All @@ -107,9 +106,9 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) {
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(context.Background(), is))
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
}()

// Check column_stats_usage.
Expand Down Expand Up @@ -157,9 +156,9 @@ func disableAutoAnalyzeCase(t *testing.T, tk *testkit.TestKit, dom *domain.Domai
require.NoError(t, h.Update(context.Background(), is))

tk.MustExec("set @@global.tidb_enable_auto_analyze = 0")
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
}()
// Even auto analyze ratio is set to 0, we still need to analyze the unanalyzed tables.
require.True(t, dom.StatsHandle().HandleAutoAnalyze())
Expand All @@ -182,9 +181,9 @@ func TestAutoAnalyzeOnChangeAnalyzeVer(t *testing.T) {
tk.MustExec("insert into t values(1)")
tk.MustExec("set @@global.tidb_analyze_version = 1")
do := dom
exec.AutoAnalyzeMinCnt = 0
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
statistics.AutoAnalyzeMinCnt = 1000
}()
h := do.StatsHandle()
err := h.HandleDDLEvent(<-h.DDLEventCh())
Expand Down Expand Up @@ -352,10 +351,10 @@ func TestAutoAnalyzeSkipColumnTypes(t *testing.T) {
require.NoError(t, h.DumpColStatsUsageToKV())
tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'")

originalVal := exec.AutoAnalyzeMinCnt
exec.AutoAnalyzeMinCnt = 0
originalVal := statistics.AutoAnalyzeMinCnt
statistics.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = originalVal
statistics.AutoAnalyzeMinCnt = originalVal
}()
require.True(t, h.HandleAutoAnalyze())
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table all indexes, columns a, b, d with 256 buckets, 100 topn, 1 samplerate"))
Expand Down Expand Up @@ -384,7 +383,7 @@ func TestAutoAnalyzeOnEmptyTable(t *testing.T) {
// to pass the stats.Pseudo check in autoAnalyzeTable
tk.MustExec("analyze table t")
// to pass the AutoAnalyzeMinCnt check in autoAnalyzeTable
tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(exec.AutoAnalyzeMinCnt)))
tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(statistics.AutoAnalyzeMinCnt)))
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(true))
require.NoError(t, dom.StatsHandle().Update(context.Background(), dom.InfoSchema()))

Expand Down Expand Up @@ -419,7 +418,7 @@ func TestAutoAnalyzeOutOfSpecifiedTime(t *testing.T) {
// to pass the stats.Pseudo check in autoAnalyzeTable
tk.MustExec("analyze table t")
// to pass the AutoAnalyzeMinCnt check in autoAnalyzeTable
tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(exec.AutoAnalyzeMinCnt)))
tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(statistics.AutoAnalyzeMinCnt)))
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(true))
require.NoError(t, dom.StatsHandle().Update(context.Background(), dom.InfoSchema()))

Expand Down
4 changes: 0 additions & 4 deletions pkg/statistics/handle/autoanalyze/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ import (
"go.uber.org/zap"
)

// AutoAnalyzeMinCnt means if the count of table is less than this value, we don't need to do auto analyze.
// Exported for testing.
var AutoAnalyzeMinCnt int64 = 1000

var execOptionForAnalyze = map[int]sqlexec.OptionFuncAlias{
statistics.Version0: sqlexec.ExecOptionAnalyzeVer1,
statistics.Version1: sqlexec.ExecOptionAnalyzeVer1,
Expand Down
1 change: 0 additions & 1 deletion pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ go_test(
":refresher",
"//pkg/parser/model",
"//pkg/statistics",
"//pkg/statistics/handle/autoanalyze/exec",
"//pkg/statistics/handle/autoanalyze/priorityqueue",
"//pkg/testkit",
"@com_github_stretchr_testify//require",
Expand Down
22 changes: 4 additions & 18 deletions pkg/statistics/handle/autoanalyze/refresher/refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func CreateTableAnalysisJob(
autoAnalyzeRatio float64,
currentTs uint64,
) priorityqueue.AnalysisJob {
if !isEligibleForAnalysis(tblStats) {
if !tblStats.IsEligibleForAnalysis() {
return nil
}

Expand Down Expand Up @@ -331,7 +331,7 @@ func CreateStaticPartitionAnalysisJob(
autoAnalyzeRatio float64,
currentTs uint64,
) priorityqueue.AnalysisJob {
if !isEligibleForAnalysis(partitionStats) {
if !partitionStats.IsEligibleForAnalysis() {
return nil
}

Expand Down Expand Up @@ -465,7 +465,7 @@ func createTableAnalysisJobForPartitions(
autoAnalyzeRatio float64,
currentTs uint64,
) priorityqueue.AnalysisJob {
if !isEligibleForAnalysis(tblStats) {
if !tblStats.IsEligibleForAnalysis() {
return nil
}

Expand Down Expand Up @@ -611,7 +611,7 @@ func getPartitionStats(
for _, def := range defs {
stats := statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, def.ID)
// Ignore the partition if it's not ready to analyze.
if !isEligibleForAnalysis(stats) {
if !stats.IsEligibleForAnalysis() {
continue
}
d := PartitionIDAndName{
Expand All @@ -624,20 +624,6 @@ func getPartitionStats(
return partitionStats
}

func isEligibleForAnalysis(
tblStats *statistics.Table,
) bool {
// 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze.
// Pseudo statistics can be created by the optimizer, so we need to double check it.
// 2. If the table is too small, we don't want to waste time to analyze it.
// Leave the opportunity to other bigger tables.
if tblStats == nil || tblStats.Pseudo || tblStats.RealtimeCount < exec.AutoAnalyzeMinCnt {
return false
}

return true
}

// autoAnalysisTimeWindow is a struct that contains the start and end time of the auto analyze time window.
type autoAnalysisTimeWindow struct {
start time.Time
Expand Down
Loading

0 comments on commit 7e73ddc

Please sign in to comment.