From 397062f2e711a868f4928756f676210de2259fc8 Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Mon, 19 Jun 2023 23:22:10 +0800 Subject: [PATCH] planner, sessionctx: skip wide type columns such as JSON when collecting statistics (#44653) close pingcap/tidb#44725 --- executor/set_test.go | 14 +++++++++ executor/test/analyzetest/analyze_test.go | 34 ++++++++++++++++++++ planner/core/planbuilder.go | 31 ++++++++++++++++++ sessionctx/variable/session.go | 3 ++ sessionctx/variable/sysvar.go | 8 +++++ sessionctx/variable/tidb_vars.go | 2 ++ sessionctx/variable/varsutil.go | 38 +++++++++++++++++++++++ 7 files changed, 130 insertions(+) diff --git a/executor/set_test.go b/executor/set_test.go index 0bd8571c80a5c..c6eb6aaa82ba3 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -852,6 +852,20 @@ func TestSetVar(t *testing.T) { require.Equal(t, uint64(2), tk.Session().GetSessionVars().CDCWriteSource) tk.MustExec("set @@session.tidb_cdc_write_source = 0") require.Equal(t, uint64(0), tk.Session().GetSessionVars().CDCWriteSource) + + tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows("json,blob,mediumblob,longblob")) + tk.MustExec("set @@session.tidb_analyze_skip_column_types = 'json, text, blob'") + tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows("json,text,blob")) + tk.MustExec("set @@session.tidb_analyze_skip_column_types = ''") + tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows("")) + tk.MustGetErrMsg("set @@session.tidb_analyze_skip_column_types = 'int,json'", "[variable:1231]Variable 'tidb_analyze_skip_column_types' can't be set to the value of 'int,json'") + + tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows("json,blob,mediumblob,longblob")) + tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json, text, blob'") + tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows("json,text,blob")) + tk.MustExec("set @@global.tidb_analyze_skip_column_types = ''") + tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows("")) + tk.MustGetErrMsg("set @@global.tidb_analyze_skip_column_types = 'int,json'", "[variable:1231]Variable 'tidb_analyze_skip_column_types' can't be set to the value of 'int,json'") } func TestGetSetNoopVars(t *testing.T) { diff --git a/executor/test/analyzetest/analyze_test.go b/executor/test/analyzetest/analyze_test.go index 0865b3a5a9923..fdab734afd534 100644 --- a/executor/test/analyzetest/analyze_test.go +++ b/executor/test/analyzetest/analyze_test.go @@ -243,6 +243,7 @@ func TestAnalyzeTooLongColumns(t *testing.T) { value := fmt.Sprintf(`{"x":"%s"}`, strings.Repeat("x", mysql.MaxFieldVarCharLength)) tk.MustExec(fmt.Sprintf("insert into t values ('%s')", value)) + tk.MustExec("set @@session.tidb_analyze_skip_column_types = ''") tk.MustExec("analyze table t") is := tk.Session().(sessionctx.Context).GetInfoSchema().(infoschema.InfoSchema) table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) @@ -3239,3 +3240,36 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) { require.True(t, stats.Indices[tblInfo.Indices[0].ID].IsStatsInitialized()) require.False(t, stats.Indices[tblInfo.Indices[1].ID].IsStatsInitialized()) } + +func TestManualAnalyzeSkipColumnTypes(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c json, d text, e mediumtext, f blob, g mediumblob, index idx(d(10)))") + tk.MustExec("set @@session.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'") + tk.MustExec("analyze table t") + tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%analyze table%'").Check(testkit.Rows("analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate")) + tk.MustExec("delete from mysql.analyze_jobs") + tk.MustExec("analyze table t columns a, e") + tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%analyze table%'").Check(testkit.Rows("analyze table columns a, d with 256 buckets, 500 topn, 1 samplerate")) +} + +func TestAutoAnalyzeSkipColumnTypes(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c json, d text, e mediumtext, f blob, g mediumblob, index idx(d(10)))") + tk.MustExec("insert into t values (1, 2, null, 'xxx', 'yyy', null, null)") + h := dom.StatsHandle() + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + require.NoError(t, h.Update(dom.InfoSchema())) + tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'") + + originalVal := handle.AutoAnalyzeMinCnt + handle.AutoAnalyzeMinCnt = 0 + defer func() { + handle.AutoAnalyzeMinCnt = originalVal + }() + require.True(t, h.HandleAutoAnalyze(dom.InfoSchema())) + tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate")) +} diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 7abe4d4106c9e..1cd3d798aa304 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -2467,6 +2467,37 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( if colsInfo, ok := colsInfoMap[physicalID]; ok { execColsInfo = colsInfo } + filterSkipColumnTypes := func(origin []*model.ColumnInfo) (result []*model.ColumnInfo) { + skipTypes := b.ctx.GetSessionVars().AnalyzeSkipColumnTypes + if b.ctx.GetSessionVars().InRestrictedSQL { + // For auto analyze, we need to use @@global.tidb_analyze_skip_column_types. + val, err1 := b.ctx.GetSessionVars().GlobalVarsAccessor.GetGlobalSysVar(variable.TiDBAnalyzeSkipColumnTypes) + if err1 != nil { + logutil.BgLogger().Error("loading tidb_analyze_skip_column_types failed", zap.Error(err1)) + result = origin + return + } + skipTypes = variable.ParseAnalyzeSkipColumnTypes(val) + } + mustAnalyze, err1 := b.getMustAnalyzedColumns(tbl, &mustAnalyzedCols) + if err1 != nil { + logutil.BgLogger().Error("getting must-analyzed columns failed", zap.Error(err1)) + result = origin + return + } + for _, colInfo := range origin { + _, skip := skipTypes[types.TypeToStr(colInfo.FieldType.GetType(), colInfo.FieldType.GetCharset())] + // Currently, if the column exists in some index(except MV Index), we need to bring the column's sample values + // into TiDB to build the index statistics. + _, keep := mustAnalyze[colInfo.ID] + if skip && !keep { + continue + } + result = append(result, colInfo) + } + return + } + execColsInfo = filterSkipColumnTypes(execColsInfo) allColumns := len(tbl.TableInfo.Columns) == len(execColsInfo) indexes := getModifiedIndexesInfoForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo) handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo) diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 8cdc7675f05b2..e31a1d15d715a 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1498,6 +1498,9 @@ type SessionVars struct { runtimeFilterTypes []RuntimeFilterType // Runtime filter mode: only support OFF, LOCAL now runtimeFilterMode RuntimeFilterMode + + // AnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command. + AnalyzeSkipColumnTypes map[string]struct{} } var ( diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index fd88f576864ae..83ee824920195 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -2528,6 +2528,14 @@ var defaultSysVars = []*SysVar{ s.OptimizerFixControl = newMap return nil }}, + {Scope: ScopeGlobal | ScopeSession, Name: TiDBAnalyzeSkipColumnTypes, Value: "json,blob,mediumblob,longblob", Type: TypeStr, + Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) { + return ValidAnalyzeSkipColumnTypes(normalizedValue) + }, + SetSession: func(s *SessionVars, val string) error { + s.AnalyzeSkipColumnTypes = ParseAnalyzeSkipColumnTypes(val) + return nil + }}, {Scope: ScopeGlobal | ScopeSession, Name: TiDBPlanCacheInvalidationOnFreshStats, Value: BoolToOnOff(DefTiDBPlanCacheInvalidationOnFreshStats), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { s.PlanCacheInvalidationOnFreshStats = TiDBOptOn(val) return nil diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 2e24b4a19c0d9..ef4a5bbfa3b99 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -876,6 +876,8 @@ const ( TiDBOptEnableMPPSharedCTEExecution = "tidb_opt_enable_mpp_shared_cte_execution" // TiDBOptFixControl makes the user able to control some details of the optimizer behavior. TiDBOptFixControl = "tidb_opt_fix_control" + // TiDBAnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command. + TiDBAnalyzeSkipColumnTypes = "tidb_analyze_skip_column_types" ) // TiDB vars that have only global scope diff --git a/sessionctx/variable/varsutil.go b/sessionctx/variable/varsutil.go index d7eda1a16c691..5f8710b7680da 100644 --- a/sessionctx/variable/varsutil.go +++ b/sessionctx/variable/varsutil.go @@ -574,3 +574,41 @@ var GAFunction4ExpressionIndex = map[string]struct{}{ ast.JSONKeys: {}, ast.JSONLength: {}, } + +var analyzeSkipAllowedTypes = map[string]struct{}{ + "json": {}, + "text": {}, + "mediumtext": {}, + "longtext": {}, + "blob": {}, + "mediumblob": {}, + "longblob": {}, +} + +// ValidAnalyzeSkipColumnTypes makes validation for tidb_analyze_skip_column_types. +func ValidAnalyzeSkipColumnTypes(val string) (string, error) { + if val == "" { + return "", nil + } + items := strings.Split(strings.ToLower(val), ",") + columnTypes := make([]string, 0, len(items)) + for _, item := range items { + columnType := strings.TrimSpace(item) + if _, ok := analyzeSkipAllowedTypes[columnType]; !ok { + return val, ErrWrongValueForVar.GenWithStackByArgs(TiDBAnalyzeSkipColumnTypes, val) + } + columnTypes = append(columnTypes, columnType) + } + return strings.Join(columnTypes, ","), nil +} + +// ParseAnalyzeSkipColumnTypes converts tidb_analyze_skip_column_types to the map form. +func ParseAnalyzeSkipColumnTypes(val string) map[string]struct{} { + skipTypes := make(map[string]struct{}) + for _, columnType := range strings.Split(strings.ToLower(val), ",") { + if _, ok := analyzeSkipAllowedTypes[columnType]; ok { + skipTypes[columnType] = struct{}{} + } + } + return skipTypes +}