Skip to content

Commit

Permalink
planner, sessionctx: skip wide type columns such as JSON when collect…
Browse files Browse the repository at this point in the history
…ing statistics (#44653)

close #44725
  • Loading branch information
xuyifangreeneyes authored Jun 19, 2023
1 parent 6e3d0eb commit 397062f
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 0 deletions.
14 changes: 14 additions & 0 deletions executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,20 @@ func TestSetVar(t *testing.T) {
require.Equal(t, uint64(2), tk.Session().GetSessionVars().CDCWriteSource)
tk.MustExec("set @@session.tidb_cdc_write_source = 0")
require.Equal(t, uint64(0), tk.Session().GetSessionVars().CDCWriteSource)

tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows("json,blob,mediumblob,longblob"))
tk.MustExec("set @@session.tidb_analyze_skip_column_types = 'json, text, blob'")
tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows("json,text,blob"))
tk.MustExec("set @@session.tidb_analyze_skip_column_types = ''")
tk.MustQuery("select @@session.tidb_analyze_skip_column_types").Check(testkit.Rows(""))
tk.MustGetErrMsg("set @@session.tidb_analyze_skip_column_types = 'int,json'", "[variable:1231]Variable 'tidb_analyze_skip_column_types' can't be set to the value of 'int,json'")

tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows("json,blob,mediumblob,longblob"))
tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json, text, blob'")
tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows("json,text,blob"))
tk.MustExec("set @@global.tidb_analyze_skip_column_types = ''")
tk.MustQuery("select @@global.tidb_analyze_skip_column_types").Check(testkit.Rows(""))
tk.MustGetErrMsg("set @@global.tidb_analyze_skip_column_types = 'int,json'", "[variable:1231]Variable 'tidb_analyze_skip_column_types' can't be set to the value of 'int,json'")
}

func TestGetSetNoopVars(t *testing.T) {
Expand Down
34 changes: 34 additions & 0 deletions executor/test/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ func TestAnalyzeTooLongColumns(t *testing.T) {
value := fmt.Sprintf(`{"x":"%s"}`, strings.Repeat("x", mysql.MaxFieldVarCharLength))
tk.MustExec(fmt.Sprintf("insert into t values ('%s')", value))

tk.MustExec("set @@session.tidb_analyze_skip_column_types = ''")
tk.MustExec("analyze table t")
is := tk.Session().(sessionctx.Context).GetInfoSchema().(infoschema.InfoSchema)
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
Expand Down Expand Up @@ -3239,3 +3240,36 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) {
require.True(t, stats.Indices[tblInfo.Indices[0].ID].IsStatsInitialized())
require.False(t, stats.Indices[tblInfo.Indices[1].ID].IsStatsInitialized())
}

func TestManualAnalyzeSkipColumnTypes(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c json, d text, e mediumtext, f blob, g mediumblob, index idx(d(10)))")
tk.MustExec("set @@session.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'")
tk.MustExec("analyze table t")
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%analyze table%'").Check(testkit.Rows("analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate"))
tk.MustExec("delete from mysql.analyze_jobs")
tk.MustExec("analyze table t columns a, e")
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%analyze table%'").Check(testkit.Rows("analyze table columns a, d with 256 buckets, 500 topn, 1 samplerate"))
}

func TestAutoAnalyzeSkipColumnTypes(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c json, d text, e mediumtext, f blob, g mediumblob, index idx(d(10)))")
tk.MustExec("insert into t values (1, 2, null, 'xxx', 'yyy', null, null)")
h := dom.StatsHandle()
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.Update(dom.InfoSchema()))
tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'")

originalVal := handle.AutoAnalyzeMinCnt
handle.AutoAnalyzeMinCnt = 0
defer func() {
handle.AutoAnalyzeMinCnt = originalVal
}()
require.True(t, h.HandleAutoAnalyze(dom.InfoSchema()))
tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate"))
}
31 changes: 31 additions & 0 deletions planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2467,6 +2467,37 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
if colsInfo, ok := colsInfoMap[physicalID]; ok {
execColsInfo = colsInfo
}
filterSkipColumnTypes := func(origin []*model.ColumnInfo) (result []*model.ColumnInfo) {
skipTypes := b.ctx.GetSessionVars().AnalyzeSkipColumnTypes
if b.ctx.GetSessionVars().InRestrictedSQL {
// For auto analyze, we need to use @@global.tidb_analyze_skip_column_types.
val, err1 := b.ctx.GetSessionVars().GlobalVarsAccessor.GetGlobalSysVar(variable.TiDBAnalyzeSkipColumnTypes)
if err1 != nil {
logutil.BgLogger().Error("loading tidb_analyze_skip_column_types failed", zap.Error(err1))
result = origin
return
}
skipTypes = variable.ParseAnalyzeSkipColumnTypes(val)
}
mustAnalyze, err1 := b.getMustAnalyzedColumns(tbl, &mustAnalyzedCols)
if err1 != nil {
logutil.BgLogger().Error("getting must-analyzed columns failed", zap.Error(err1))
result = origin
return
}
for _, colInfo := range origin {
_, skip := skipTypes[types.TypeToStr(colInfo.FieldType.GetType(), colInfo.FieldType.GetCharset())]
// Currently, if the column exists in some index(except MV Index), we need to bring the column's sample values
// into TiDB to build the index statistics.
_, keep := mustAnalyze[colInfo.ID]
if skip && !keep {
continue
}
result = append(result, colInfo)
}
return
}
execColsInfo = filterSkipColumnTypes(execColsInfo)
allColumns := len(tbl.TableInfo.Columns) == len(execColsInfo)
indexes := getModifiedIndexesInfoForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,9 @@ type SessionVars struct {
runtimeFilterTypes []RuntimeFilterType
// Runtime filter mode: only support OFF, LOCAL now
runtimeFilterMode RuntimeFilterMode

// AnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command.
AnalyzeSkipColumnTypes map[string]struct{}
}

var (
Expand Down
8 changes: 8 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -2528,6 +2528,14 @@ var defaultSysVars = []*SysVar{
s.OptimizerFixControl = newMap
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBAnalyzeSkipColumnTypes, Value: "json,blob,mediumblob,longblob", Type: TypeStr,
Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) {
return ValidAnalyzeSkipColumnTypes(normalizedValue)
},
SetSession: func(s *SessionVars, val string) error {
s.AnalyzeSkipColumnTypes = ParseAnalyzeSkipColumnTypes(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBPlanCacheInvalidationOnFreshStats, Value: BoolToOnOff(DefTiDBPlanCacheInvalidationOnFreshStats), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.PlanCacheInvalidationOnFreshStats = TiDBOptOn(val)
return nil
Expand Down
2 changes: 2 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ const (
TiDBOptEnableMPPSharedCTEExecution = "tidb_opt_enable_mpp_shared_cte_execution"
// TiDBOptFixControl makes the user able to control some details of the optimizer behavior.
TiDBOptFixControl = "tidb_opt_fix_control"
// TiDBAnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command.
TiDBAnalyzeSkipColumnTypes = "tidb_analyze_skip_column_types"
)

// TiDB vars that have only global scope
Expand Down
38 changes: 38 additions & 0 deletions sessionctx/variable/varsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,3 +574,41 @@ var GAFunction4ExpressionIndex = map[string]struct{}{
ast.JSONKeys: {},
ast.JSONLength: {},
}

var analyzeSkipAllowedTypes = map[string]struct{}{
"json": {},
"text": {},
"mediumtext": {},
"longtext": {},
"blob": {},
"mediumblob": {},
"longblob": {},
}

// ValidAnalyzeSkipColumnTypes makes validation for tidb_analyze_skip_column_types.
func ValidAnalyzeSkipColumnTypes(val string) (string, error) {
if val == "" {
return "", nil
}
items := strings.Split(strings.ToLower(val), ",")
columnTypes := make([]string, 0, len(items))
for _, item := range items {
columnType := strings.TrimSpace(item)
if _, ok := analyzeSkipAllowedTypes[columnType]; !ok {
return val, ErrWrongValueForVar.GenWithStackByArgs(TiDBAnalyzeSkipColumnTypes, val)
}
columnTypes = append(columnTypes, columnType)
}
return strings.Join(columnTypes, ","), nil
}

// ParseAnalyzeSkipColumnTypes converts tidb_analyze_skip_column_types to the map form.
func ParseAnalyzeSkipColumnTypes(val string) map[string]struct{} {
skipTypes := make(map[string]struct{})
for _, columnType := range strings.Split(strings.ToLower(val), ",") {
if _, ok := analyzeSkipAllowedTypes[columnType]; ok {
skipTypes[columnType] = struct{}{}
}
}
return skipTypes
}

0 comments on commit 397062f

Please sign in to comment.