diff --git a/domain/sysvar_cache.go b/domain/sysvar_cache.go index a5edb5fbfa425..c4f28629fa332 100644 --- a/domain/sysvar_cache.go +++ b/domain/sysvar_cache.go @@ -243,6 +243,8 @@ func (do *Domain) checkEnableServerGlobalVar(name, sVal string) { storekv.StoreLimit.Store(val) case variable.TiDBPersistAnalyzeOptions: variable.PersistAnalyzeOptions.Store(variable.TiDBOptOn(sVal)) + case variable.TiDBEnableColumnTracking: + variable.EnableColumnTracking.Store(variable.TiDBOptOn(sVal)) } if err != nil { logutil.BgLogger().Error(fmt.Sprintf("load global variable %s error", name), zap.Error(err)) diff --git a/executor/analyze_test.go b/executor/analyze_test.go index cb492fc99f384..de35933562e5a 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -24,14 +24,17 @@ import ( "testing" "time" + "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/errno" "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/parser/terror" "github.com/pingcap/tidb/planner/core" "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/sessionctx" @@ -1747,3 +1750,837 @@ func TestSavedAnalyzeOptionsForMultipleTables(t *testing.T) { require.Equal(t, "2", rs.Rows()[0][3]) require.Equal(t, "2", rs.Rows()[0][4]) } + +func TestSavedAnalyzeColumnOptions(t *testing.T) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + tk := testkit.NewTestKit(t, store) + originalVal1 := tk.MustQuery("select @@tidb_persist_analyze_options").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_persist_analyze_options = %v", originalVal1)) + }() + tk.MustExec("set global tidb_persist_analyze_options = true") + originalVal2 := tk.MustQuery("select @@tidb_auto_analyze_ratio").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2)) + }() + tk.MustExec("set global tidb_auto_analyze_ratio = 0.01") + originalVal3 := handle.AutoAnalyzeMinCnt + defer func() { + handle.AutoAnalyzeMinCnt = originalVal3 + }() + handle.AutoAnalyzeMinCnt = 0 + originalVal4 := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal4)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + + tk.MustExec("use test") + tk.MustExec("set @@session.tidb_analyze_version = 2") + tk.MustExec("create table t(a int, b int, c int)") + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4)") + + h := dom.StatsHandle() + oriLease := h.Lease() + h.SetLease(1) + defer func() { + h.SetLease(oriLease) + }() + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.Nil(t, err) + tblInfo := tbl.Meta() + tk.MustExec("select * from t where b > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + tk.MustExec("analyze table t predicate columns") + require.NoError(t, h.LoadNeededHistograms()) + tblStats := h.GetTableStats(tblInfo) + lastVersion := tblStats.Version + // column b is analyzed + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[0].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[1].ID].LastUpdateVersion) + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[2].ID].LastUpdateVersion) + tk.MustQuery(fmt.Sprintf("select column_choice, column_ids from mysql.analyze_options where table_id = %v", tblInfo.ID)).Check(testkit.Rows("PREDICATE ")) + + tk.MustExec("select * from t where c > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + // manually analyze uses the saved option(predicate columns). + tk.MustExec("analyze table t") + require.NoError(t, h.LoadNeededHistograms()) + tblStats = h.GetTableStats(tblInfo) + require.Less(t, lastVersion, tblStats.Version) + lastVersion = tblStats.Version + // column b, c are analyzed + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[0].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[1].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[2].ID].LastUpdateVersion) + + tk.MustExec("insert into t values (5,5,5),(6,6,6)") + require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + require.Nil(t, h.Update(is)) + // auto analyze uses the saved option(predicate columns). + h.HandleAutoAnalyze(is) + tblStats = h.GetTableStats(tblInfo) + require.Less(t, lastVersion, tblStats.Version) + lastVersion = tblStats.Version + // column b, c are analyzed + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[0].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[1].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[2].ID].LastUpdateVersion) + + tk.MustExec("analyze table t columns a") + tblStats = h.GetTableStats(tblInfo) + require.Less(t, lastVersion, tblStats.Version) + lastVersion = tblStats.Version + // column a is analyzed + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[0].ID].LastUpdateVersion) + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[1].ID].LastUpdateVersion) + require.Greater(t, lastVersion, tblStats.Columns[tblInfo.Columns[2].ID].LastUpdateVersion) + tk.MustQuery(fmt.Sprintf("select column_choice, column_ids from mysql.analyze_options where table_id = %v", tblInfo.ID)).Check(testkit.Rows(fmt.Sprintf("LIST %v", tblInfo.Columns[0].ID))) + + tk.MustExec("analyze table t all columns") + tblStats = h.GetTableStats(tblInfo) + require.Less(t, lastVersion, tblStats.Version) + lastVersion = tblStats.Version + // column a, b, c are analyzed + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[0].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[1].ID].LastUpdateVersion) + require.Equal(t, lastVersion, tblStats.Columns[tblInfo.Columns[2].ID].LastUpdateVersion) + tk.MustQuery(fmt.Sprintf("select column_choice, column_ids from mysql.analyze_options where table_id = %v", tblInfo.ID)).Check(testkit.Rows("ALL ")) +} + +func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("create table t (a int, b int, c int primary key)") + tk.MustExec("insert into t values (1,1,1), (1,1,2), (2,2,3), (2,2,4), (3,3,5), (4,3,6), (5,4,7), (6,4,8), (null,null,9)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where a > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "a", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 2, len(rows)) + require.Equal(t, "a", rows[0][3]) + require.Equal(t, "c", rows[1][3]) + + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 9")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t a 0 1 2", + "test t a 0 2 2", + "test t c 0 1 1", + "test t c 0 2 1")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 6 1 8 2 1", + "0 2 0 0 8 0 0", // column b is not analyzed + "0 3 9 0 9 2 1", + )) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t a 0 0 3 1 3 5 0", + "test t a 0 1 4 1 6 6 0", + "test t c 0 0 4 1 3 6 0", + "test t c 0 1 7 1 7 9 0")) + }(val) + } +} + +func TestAnalyzeColumnsWithIndex(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("create table t (a int, b int, c int, d int, index idx_b_d(b, d))") + tk.MustExec("insert into t values (1,1,null,1), (2,1,9,1), (1,1,8,1), (2,2,7,2), (1,3,7,3), (2,4,6,4), (1,4,6,5), (2,4,6,5), (1,5,6,5)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where c > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "c", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 3, len(rows)) + require.Equal(t, "b", rows[0][3]) + require.Equal(t, "c", rows[1][3]) + require.Equal(t, "d", rows[2][3]) + + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 9")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t b 0 1 3", + "test t b 0 4 3", + "test t c 0 6 4", + "test t c 0 7 2", + "test t d 0 1 3", + "test t d 0 5 3", + "test t idx_b_d 1 (1, 1) 3", + "test t idx_b_d 1 (4, 5) 2")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 0 0 9 0 0", // column a is not analyzed + "0 2 5 0 9 2 1", + "0 3 4 1 8 2 -0.07", + "0 4 5 0 9 2 1", + "1 1 6 0 18 2 0")) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t b 0 0 2 1 2 3 0", + "test t b 0 1 3 1 5 5 0", + "test t c 0 0 2 1 8 9 0", + "test t d 0 0 2 1 2 3 0", + "test t d 0 1 3 1 4 4 0", + "test t idx_b_d 1 0 3 1 (2, 2) (4, 4) 0", + "test t idx_b_d 1 1 4 1 (5, 5) (5, 5) 0")) + }(val) + } +} + +func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("create table t (a int, b int, c int, d int, primary key(b, d) clustered)") + tk.MustExec("insert into t values (1,1,null,1), (2,2,9,2), (1,3,8,3), (2,4,7,4), (1,5,7,5), (2,6,6,6), (1,7,6,7), (2,8,6,8), (1,9,6,9)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where c > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "c", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 3, len(rows)) + require.Equal(t, "b", rows[0][3]) + require.Equal(t, "c", rows[1][3]) + require.Equal(t, "d", rows[2][3]) + + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 9")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t PRIMARY 1 (1, 1) 1", + "test t PRIMARY 1 (2, 2) 1", + "test t b 0 1 1", + "test t b 0 2 1", + "test t c 0 6 4", + "test t c 0 7 2", + "test t d 0 1 1", + "test t d 0 2 1")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 0 0 9 0 0", // column a is not analyzed + "0 2 9 0 9 2 1", + "0 3 4 1 8 2 -0.07", + "0 4 9 0 9 2 1", + "1 1 9 0 18 2 0")) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t PRIMARY 1 0 4 1 (3, 3) (6, 6) 0", + "test t PRIMARY 1 1 7 1 (7, 7) (9, 9) 0", + "test t b 0 0 4 1 3 6 0", + "test t b 0 1 7 1 7 9 0", + "test t c 0 0 2 1 8 9 0", + "test t d 0 0 4 1 3 6 0", + "test t d 0 1 7 1 7 9 0")) + }(val) + } +} + +func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'") + tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)") + tk.MustExec("insert into t values (1,2,1), (2,4,1), (3,6,1), (4,8,2), (4,8,2), (5,10,3), (5,10,4), (5,10,5), (null,null,6), (11,22,7), (12,24,8), (13,26,9), (14,28,10), (15,30,11), (16,32,12), (16,32,13), (16,32,13), (16,32,14), (17,34,14), (17,34,14)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + defs := tbl.Meta().Partition.Definitions + p0ID := defs[0].ID + p1ID := defs[1].ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0.", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1.", + "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where a < 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, []interface{}{"test", "t", "global", "a"}, rows[0][:4]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 6, len(rows)) + require.Equal(t, []interface{}{"test", "t", "global", "a"}, rows[0][:4]) + require.Equal(t, []interface{}{"test", "t", "global", "c"}, rows[1][:4]) + require.Equal(t, []interface{}{"test", "t", "p0", "a"}, rows[2][:4]) + require.Equal(t, []interface{}{"test", "t", "p0", "c"}, rows[3][:4]) + require.Equal(t, []interface{}{"test", "t", "p1", "a"}, rows[4][:4]) + require.Equal(t, []interface{}{"test", "t", "p1", "c"}, rows[5][:4]) + + rows = tk.MustQuery("show stats_meta where db_name = 'test' and table_name = 't'").Sort().Rows() + require.Equal(t, 3, len(rows)) + require.Equal(t, []interface{}{"test", "t", "global", "0", "20"}, append(rows[0][:3], rows[0][4:]...)) + require.Equal(t, []interface{}{"test", "t", "p0", "0", "9"}, append(rows[1][:3], rows[1][4:]...)) + require.Equal(t, []interface{}{"test", "t", "p1", "0", "11"}, append(rows[2][:3], rows[2][4:]...)) + + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t global a 0 16 4", + "test t global a 0 5 3", + "test t global c 0 1 3", + "test t global c 0 14 3", + "test t p0 a 0 4 2", + "test t p0 a 0 5 3", + "test t p0 c 0 1 3", + "test t p0 c 0 2 2", + "test t p1 a 0 16 4", + "test t p1 a 0 17 2", + "test t p1 c 0 13 2", + "test t p1 c 0 14 3")) + + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t global idx 1 1 3", + "test t global idx 1 14 3", + "test t p0 idx 1 1 3", + "test t p0 idx 1 2 2", + "test t p1 idx 1 13 2", + "test t p1 idx 1 14 3")) + + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t global a 0 0 5 2 1 4 0", + "test t global a 0 1 12 2 17 17 0", + "test t global c 0 0 6 1 2 6 0", + "test t global c 0 1 14 2 13 13 0", + "test t p0 a 0 0 2 1 1 2 0", + "test t p0 a 0 1 3 1 3 3 0", + "test t p0 c 0 0 3 1 3 5 0", + "test t p0 c 0 1 4 1 6 6 0", + "test t p1 a 0 0 3 1 11 13 0", + "test t p1 a 0 1 5 1 14 15 0", + "test t p1 c 0 0 4 1 7 10 0", + "test t p1 c 0 1 6 1 11 12 0")) + + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t global idx 1 0 6 1 2 6 0", + "test t global idx 1 1 14 2 13 13 0", + "test t p0 idx 1 0 3 1 3 5 0", + "test t p0 idx 1 1 4 1 6 6 0", + "test t p1 idx 1 0 4 1 7 10 0", + "test t p1 idx 1 1 6 1 11 12 0")) + + tk.MustQuery("select table_id, is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms order by table_id, is_index, hist_id asc").Check( + testkit.Rows(fmt.Sprintf("%d 0 1 12 1 19 2 0", tblID), // global, a + fmt.Sprintf("%d 0 3 14 0 20 2 0", tblID), // global, c + fmt.Sprintf("%d 1 1 14 0 0 2 0", tblID), // global, idx + fmt.Sprintf("%d 0 1 5 1 8 2 1", p0ID), // p0, a + fmt.Sprintf("%d 0 2 0 0 8 0 0", p0ID), // p0, b, not analyzed + fmt.Sprintf("%d 0 3 6 0 9 2 1", p0ID), // p0, c + fmt.Sprintf("%d 1 1 6 0 9 2 0", p0ID), // p0, idx + fmt.Sprintf("%d 0 1 7 0 11 2 1", p1ID), // p1, a + fmt.Sprintf("%d 0 2 0 0 11 0 0", p1ID), // p1, b, not analyzed + fmt.Sprintf("%d 0 3 8 0 11 2 1", p1ID), // p1, c + fmt.Sprintf("%d 1 1 8 0 11 2 0", p1ID), // p1, idx + )) + }(val) + } +} + +func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("set @@tidb_partition_prune_mode = 'static'") + tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)") + tk.MustExec("insert into t values (1,2,1), (2,4,1), (3,6,1), (4,8,2), (4,8,2), (5,10,3), (5,10,4), (5,10,5), (null,null,6), (11,22,7), (12,24,8), (13,26,9), (14,28,10), (15,30,11), (16,32,12), (16,32,13), (16,32,13), (16,32,14), (17,34,14), (17,34,14)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + defs := tbl.Meta().Partition.Definitions + p0ID := defs[0].ID + p1ID := defs[1].ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0.", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1.", + "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where a < 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, []interface{}{"test", "t", "global", "a"}, rows[0][:4]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 4, len(rows)) + require.Equal(t, []interface{}{"test", "t", "p0", "a"}, rows[0][:4]) + require.Equal(t, []interface{}{"test", "t", "p0", "c"}, rows[1][:4]) + require.Equal(t, []interface{}{"test", "t", "p1", "a"}, rows[2][:4]) + require.Equal(t, []interface{}{"test", "t", "p1", "c"}, rows[3][:4]) + + rows = tk.MustQuery("show stats_meta where db_name = 'test' and table_name = 't'").Sort().Rows() + require.Equal(t, 2, len(rows)) + require.Equal(t, []interface{}{"test", "t", "p0", "0", "9"}, append(rows[0][:3], rows[0][4:]...)) + require.Equal(t, []interface{}{"test", "t", "p1", "0", "11"}, append(rows[1][:3], rows[1][4:]...)) + + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t p0 a 0 4 2", + "test t p0 a 0 5 3", + "test t p0 c 0 1 3", + "test t p0 c 0 2 2", + "test t p1 a 0 16 4", + "test t p1 a 0 17 2", + "test t p1 c 0 13 2", + "test t p1 c 0 14 3")) + + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t p0 idx 1 1 3", + "test t p0 idx 1 2 2", + "test t p1 idx 1 13 2", + "test t p1 idx 1 14 3")) + + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t p0 a 0 0 2 1 1 2 0", + "test t p0 a 0 1 3 1 3 3 0", + "test t p0 c 0 0 3 1 3 5 0", + "test t p0 c 0 1 4 1 6 6 0", + "test t p1 a 0 0 3 1 11 13 0", + "test t p1 a 0 1 5 1 14 15 0", + "test t p1 c 0 0 4 1 7 10 0", + "test t p1 c 0 1 6 1 11 12 0")) + + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t p0 idx 1 0 3 1 3 5 0", + "test t p0 idx 1 1 4 1 6 6 0", + "test t p1 idx 1 0 4 1 7 10 0", + "test t p1 idx 1 1 6 1 11 12 0")) + + tk.MustQuery("select table_id, is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms order by table_id, is_index, hist_id asc").Check( + testkit.Rows(fmt.Sprintf("%d 0 1 5 1 8 2 1", p0ID), // p0, a + fmt.Sprintf("%d 0 2 0 0 8 0 0", p0ID), // p0, b, not analyzed + fmt.Sprintf("%d 0 3 6 0 9 2 1", p0ID), // p0, c + fmt.Sprintf("%d 1 1 6 0 9 2 0", p0ID), // p0, idx + fmt.Sprintf("%d 0 1 7 0 11 2 1", p1ID), // p1, a + fmt.Sprintf("%d 0 2 0 0 11 0 0", p1ID), // p1, b, not analyzed + fmt.Sprintf("%d 0 3 8 0 11 2 1", p1ID), // p1, c + fmt.Sprintf("%d 1 1 8 0 11 2 0", p1ID), // p1, idx + )) + }(val) + } +} + +func TestAnalyzeColumnsWithExtendedStats(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("set @@tidb_enable_extended_stats = on") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("alter table t add stats_extended s1 correlation(b,c)") + tk.MustExec("insert into t values (5,1,1), (4,2,2), (3,3,3), (2,4,4), (1,5,5)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "b", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() + require.Equal(t, 2, len(rows)) + require.Equal(t, "b", rows[0][3]) + require.Equal(t, "c", rows[1][3]) + + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 5")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t b 0 1 1", + "test t b 0 2 1", + "test t c 0 1 1", + "test t c 0 2 1")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 0 0 5 0 0", // column a is not analyzed + "0 2 5 0 5 2 1", + "0 3 5 0 5 2 1", + )) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t b 0 0 2 1 3 4 0", + "test t b 0 1 3 1 5 5 0", + "test t c 0 0 2 1 3 4 0", + "test t c 0 1 3 1 5 5 0")) + rows = tk.MustQuery("show stats_extended where db_name = 'test' and table_name = 't'").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, []interface{}{"test", "t", "s1", "[b,c]", "correlation", "1.000000"}, rows[0][:len(rows[0])-1]) + }(val) + } +} + +func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("create table t (a int, b int, c int as (b+1), index idx(c))") + tk.MustExec("insert into t (a,b) values (1,1), (2,2), (3,3), (4,4), (5,4), (6,5), (7,5), (8,5), (null,null)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", + )) + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "b", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + // virtual column c is skipped when dumping stats into disk, so only the stats of column b are updated + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "b", rows[0][3]) + + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 9")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t b 0 4 2", + "test t b 0 5 3", + "test t idx 1 5 2", + "test t idx 1 6 3")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 0 0 0 0", // column a is not analyzed + "0 2 5 1 2 1", + "0 3 0 0 0 0", // column c is not analyzed + "1 1 5 1 2 0")) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t b 0 0 2 1 1 2 0", + "test t b 0 1 3 1 3 3 0", + "test t idx 1 0 2 1 2 3 0", + "test t idx 1 1 3 1 4 4 0")) + }(val) + } +} + +func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) { + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("set @@tidb_analyze_version = 2") + tk.MustExec("create table t (a int, b int)") + tk.MustExec("insert into t (a,b) values (1,1), (1,1), (2,2), (2,2), (3,3), (4,4)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblID := tbl.Meta().ID + + tk.MustExec("analyze table t with 2 topn, 2 buckets") + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 6")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t a 0 1 2", + "test t a 0 2 2", + "test t b 0 1 2", + "test t b 0 2 2")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 4 0 6 2 1", + "0 2 4 0 6 2 1")) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t a 0 0 2 1 3 4 0", + "test t b 0 0 2 1 3 4 0")) + + tk.MustExec("insert into t (a,b) values (1,1), (6,6)") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b > 1") + require.NoError(t, h.DumpColStatsUsageToKV()) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "b", rows[0][3]) + tk.MustExec("analyze table t predicate columns with 2 topn, 2 buckets") + } + + // Column a is not analyzed in second ANALYZE. We keep the outdated stats of column a rather than delete them. + tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 8")) + tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_idx, value, count + testkit.Rows("test t a 0 1 2", + "test t a 0 2 2", + "test t b 0 1 3", + "test t b 0 2 2")) + tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( + testkit.Rows("0 1 4 0 8 2 1", // tot_col_size of column a is updated to 8 by DumpStatsDeltaToKV + "0 2 5 0 8 2 0.76")) + tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( + // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv + testkit.Rows("test t a 0 0 2 1 3 4 0", + "test t b 0 0 2 1 3 4 0", + "test t b 0 1 3 1 6 6 0")) + tk.MustQuery(fmt.Sprintf("select hist_id from mysql.stats_histograms where version = (select version from mysql.stats_meta where table_id = %d)", tblID)).Check(testkit.Rows("2")) + }(val) + } +} + +func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a int, b int)") + + // analyze version 1 doesn't support `ANALYZE COLUMNS c1, ..., cn`/`ANALYZE PREDICATE COLUMNS` currently + tk.MustExec("set @@tidb_analyze_version = 1") + err := tk.ExecToErr("analyze table t columns a") + require.Equal(t, "Only the analyze version 2 supports analyzing the specified columns", err.Error()) + err = tk.ExecToErr("analyze table t predicate columns") + require.Equal(t, "Only the analyze version 2 supports analyzing predicate columns", err.Error()) + + tk.MustExec("set @@tidb_analyze_version = 2") + // invalid column + err = tk.ExecToErr("analyze table t columns c") + terr := errors.Cause(err).(*terror.Error) + require.Equal(t, errors.ErrCode(errno.ErrAnalyzeMissColumn), terr.Code()) + + // If no predicate column is collected, analyze predicate columns gives a warning and falls back to analyze all columns. + tk.MustExec("analyze table t predicate columns") + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed.", + )) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() + require.Equal(t, 2, len(rows)) + + for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { + func(choice model.ColumnChoice) { + tk.MustExec("set @@tidb_analyze_version = 1") + tk.MustExec("analyze table t") + tk.MustExec("set @@tidb_analyze_version = 2") + switch choice { + case model.ColumnList: + tk.MustExec("analyze table t columns b") + case model.PredicateColumns: + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b > 1") + require.NoError(t, dom.StatsHandle().DumpColStatsUsageToKV()) + tk.MustExec("analyze table t predicate columns") + } + tk.MustQuery("show warnings").Sort().Check(testkit.Rows( + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", + "Warning 1105 Table test.t has version 1 statistics so all the columns must be analyzed to overwrite the current statistics.", + )) + }(val) + } +} diff --git a/executor/set_test.go b/executor/set_test.go index 641acb4ab580f..aeb8cb5141c29 100644 --- a/executor/set_test.go +++ b/executor/set_test.go @@ -589,6 +589,18 @@ func (s *testSerialSuite1) TestSetVar(c *C) { tk.MustQuery("select @@tidb_enable_historical_stats").Check(testkit.Rows("1")) tk.MustExec("set global tidb_enable_historical_stats = 0") tk.MustQuery("select @@tidb_enable_historical_stats").Check(testkit.Rows("0")) + + // test for tidb_enable_column_tracking + tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("1")) + tk.MustExec("set global tidb_enable_column_tracking = 0") + tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("0")) + // When set tidb_enable_column_tracking off, we record the time of the setting operation. + tk.MustQuery("select count(1) from mysql.tidb where variable_name = 'tidb_disable_column_tracking_time' and variable_value is not null").Check(testkit.Rows("1")) + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("1")) + c.Assert(tk.ExecToErr("select @@session.tidb_enable_column_tracking"), NotNil) + c.Assert(tk.ExecToErr("set tidb_enable_column_tracking = 0"), NotNil) + c.Assert(tk.ExecToErr("set global tidb_enable_column_tracking = -1"), NotNil) } func (s *testSuite5) TestTruncateIncorrectIntSessionVar(c *C) { diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go index 5a5a7ef39ee19..0f526ece29268 100644 --- a/planner/core/optimizer.go +++ b/planner/core/optimizer.go @@ -258,8 +258,10 @@ func checkStableResultMode(sctx sessionctx.Context) bool { // DoOptimize optimizes a logical plan to a physical plan. func DoOptimize(ctx context.Context, sctx sessionctx.Context, flag uint64, logic LogicalPlan) (PhysicalPlan, float64, error) { // TODO: move it to the logic of sync load hist-needed columns. - predicateColumns, _ := CollectColumnStatsUsage(logic, true, false) - sctx.UpdateColStatsUsage(predicateColumns) + if variable.EnableColumnTracking.Load() { + predicateColumns, _ := CollectColumnStatsUsage(logic, true, false) + sctx.UpdateColStatsUsage(predicateColumns) + } // if there is something after flagPrunColumns, do flagPrunColumnsAgain if flag&flagPrunColumns > 0 && flag-flagPrunColumns > flagPrunColumns { flag |= flagPrunColumnsAgain diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 58e34251dc508..32306c6b5c4bf 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -1816,40 +1816,20 @@ func GetPhysicalIDsAndPartitionNames(tblInfo *model.TableInfo, partitionNames [] return ids, names, nil } -func getAnalyzeColumnList(specifiedColumns []model.CIStr, tbl *ast.TableName) ([]*model.ColumnInfo, error) { - columnIDs := make(map[int64]struct{}, len(tbl.TableInfo.Columns)) - for _, colName := range specifiedColumns { - colInfo := model.FindColumnInfo(tbl.TableInfo.Columns, colName.L) - if colInfo == nil { - return nil, ErrAnalyzeMissColumn.GenWithStackByArgs(colName.O, tbl.TableInfo.Name.O) - } - columnIDs[colInfo.ID] = struct{}{} - } - colList := make([]*model.ColumnInfo, 0, len(columnIDs)) - for _, col := range tbl.TableInfo.Columns { - if _, ok := columnIDs[col.ID]; ok { - colList = append(colList, col) - } - } - return colList, nil +type calcOnceMap struct { + data map[int64]struct{} + calculated bool } -// getFullAnalyzeColumnsInfo returns the columns whose stats need to be collected. -// 1. For `ANALYZE TABLE t PREDICATE COLUMNS`, it returns union of the predicate columns and the columns in index/primary key/extended stats. -// 2. For `ANALYZE TABLE t COLUMNS c1, c2, ..., cn`, it returns union of the specified columns(c1, c2, ..., cn) and the columns in index/primary key/extended stats. -// 3. Otherwise it returns all the columns. -func (b *PlanBuilder) getFullAnalyzeColumnsInfo(columns []*model.ColumnInfo, tbl *ast.TableName, warning bool) ([]*model.ColumnInfo, error) { - tblInfo := tbl.TableInfo - if len(columns) == 0 { - return tblInfo.Columns, nil - } - columnIDs := make(map[int64]struct{}, len(tblInfo.Columns)) - for _, colInfo := range columns { - columnIDs[colInfo.ID] = struct{}{} +// getMustAnalyzedColumns puts the columns whose statistics must be collected into `cols` if `cols` has not been calculated. +func (b *PlanBuilder) getMustAnalyzedColumns(tbl *ast.TableName, cols *calcOnceMap) (map[int64]struct{}, error) { + if cols.calculated { + return cols.data, nil } - missingCols := make(map[int64]struct{}, len(tblInfo.Columns)-len(columnIDs)) + tblInfo := tbl.TableInfo + cols.data = make(map[int64]struct{}, len(tblInfo.Columns)) if len(tblInfo.Indices) > 0 { - // add indexed columns + // Add indexed columns. // Some indexed columns are generated columns so we also need to add the columns that make up those generated columns. columns, _, err := expression.ColumnInfos2ColumnsAndNames(b.ctx, tbl.Schema, tbl.Name, tblInfo.Columns, tblInfo) if err != nil { @@ -1862,10 +1842,7 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(columns []*model.ColumnInfo, tbl } for _, idxCol := range idx.Columns { colInfo := tblInfo.Columns[idxCol.Offset] - if _, ok := columnIDs[colInfo.ID]; !ok { - columnIDs[colInfo.ID] = struct{}{} - missingCols[colInfo.ID] = struct{}{} - } + cols.data[colInfo.ID] = struct{}{} if expr := columns[idxCol.Offset].VirtualExpr; expr != nil { virtualExprs = append(virtualExprs, expr) } @@ -1876,10 +1853,7 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(columns []*model.ColumnInfo, tbl relatedCols = expression.ExtractColumnsFromExpressions(relatedCols, virtualExprs, nil) virtualExprs = virtualExprs[:0] for _, col := range relatedCols { - if _, ok := columnIDs[col.ID]; !ok { - columnIDs[col.ID] = struct{}{} - missingCols[col.ID] = struct{}{} - } + cols.data[col.ID] = struct{}{} if col.VirtualExpr != nil { virtualExprs = append(virtualExprs, col.VirtualExpr) } @@ -1889,13 +1863,10 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(columns []*model.ColumnInfo, tbl } if tblInfo.PKIsHandle { pkCol := tblInfo.GetPkColInfo() - if _, ok := columnIDs[pkCol.ID]; !ok { - columnIDs[pkCol.ID] = struct{}{} - missingCols[pkCol.ID] = struct{}{} - } + cols.data[pkCol.ID] = struct{}{} } if b.ctx.GetSessionVars().EnableExtendedStats { - // add the columns related to extended stats + // Add the columns related to extended stats. // TODO: column_ids read from mysql.stats_extended in optimization phase may be different from that in execution phase((*Handle).BuildExtendedStats) // if someone inserts data into mysql.stats_extended between the two time points, the new added extended stats may not be computed. statsHandle := domain.GetDomain(b.ctx).StatsHandle() @@ -1904,30 +1875,133 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(columns []*model.ColumnInfo, tbl return nil, err } for _, colID := range extendedStatsColIDs { - if _, ok := columnIDs[colID]; !ok { - columnIDs[colID] = struct{}{} - missingCols[colID] = struct{}{} - } + cols.data[colID] = struct{}{} + } + } + cols.calculated = true + return cols.data, nil +} + +func (b *PlanBuilder) getPredicateColumns(tbl *ast.TableName, cols *calcOnceMap) (map[int64]struct{}, error) { + if cols.calculated { + return cols.data, nil + } + tblInfo := tbl.TableInfo + cols.data = make(map[int64]struct{}, len(tblInfo.Columns)) + do := domain.GetDomain(b.ctx) + h := do.StatsHandle() + colList, err := h.GetPredicateColumns(tblInfo.ID) + if err != nil { + return nil, err + } + if len(colList) == 0 { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("No predicate column has been collected yet for table %s.%s so all columns are analyzed.", tbl.Schema.L, tbl.Name.L)) + for _, colInfo := range tblInfo.Columns { + cols.data[colInfo.ID] = struct{}{} + } + } else { + for _, id := range colList { + cols.data[id] = struct{}{} + } + } + cols.calculated = true + return cols.data, nil +} + +func getAnalyzeColumnList(specifiedColumns []model.CIStr, tbl *ast.TableName) ([]*model.ColumnInfo, error) { + colList := make([]*model.ColumnInfo, 0, len(specifiedColumns)) + for _, colName := range specifiedColumns { + colInfo := model.FindColumnInfo(tbl.TableInfo.Columns, colName.L) + if colInfo == nil { + return nil, ErrAnalyzeMissColumn.GenWithStackByArgs(colName.O, tbl.TableInfo.Name.O) } + colList = append(colList, colInfo) } - if len(missingCols) > 0 { - missingNames := make([]string, 0, len(missingCols)) - for _, col := range tblInfo.Columns { - if _, ok := missingCols[col.ID]; ok { - missingNames = append(missingNames, col.Name.O) + return colList, nil +} + +// getFullAnalyzeColumnsInfo decides which columns need to be analyzed. +// The first return value is the columns which need to be analyzed and the second return value is the columns which need to +// be record in mysql.analyze_options(only for the case of analyze table t columns c1, .., cn). +func (b *PlanBuilder) getFullAnalyzeColumnsInfo( + tbl *ast.TableName, + columnChoice model.ColumnChoice, + specifiedCols []*model.ColumnInfo, + predicateCols, mustAnalyzedCols *calcOnceMap, + mustAllColumns bool, + warning bool, +) ([]*model.ColumnInfo, []*model.ColumnInfo, error) { + if mustAllColumns && warning && (columnChoice == model.PredicateColumns || columnChoice == model.ColumnList) { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("Table %s.%s has version 1 statistics so all the columns must be analyzed to overwrite the current statistics.", tbl.Schema.L, tbl.Name.L)) + } + colSet2colList := func(colSet map[int64]struct{}) []*model.ColumnInfo { + colList := make([]*model.ColumnInfo, 0, len(colSet)) + for _, colInfo := range tbl.TableInfo.Columns { + if _, ok := colSet[colInfo.ID]; ok { + colList = append(colList, colInfo) } } + return colList + } + switch columnChoice { + case model.DefaultChoice, model.AllColumns: + return tbl.TableInfo.Columns, nil, nil + case model.PredicateColumns: + if mustAllColumns { + return tbl.TableInfo.Columns, nil, nil + } + predicate, err := b.getPredicateColumns(tbl, predicateCols) + if err != nil { + return nil, nil, err + } + mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) + if err != nil { + return nil, nil, err + } + colSet := make(map[int64]struct{}, len(predicate)+len(mustAnalyzed)) + for colID := range predicate { + colSet[colID] = struct{}{} + } + for colID := range mustAnalyzed { + colSet[colID] = struct{}{} + } + return colSet2colList(colSet), nil, nil + case model.ColumnList: + colSet := make(map[int64]struct{}, len(specifiedCols)) + for _, colInfo := range specifiedCols { + colSet[colInfo.ID] = struct{}{} + } + mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) + if err != nil { + return nil, nil, err + } if warning { - b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("Columns %s are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", strings.Join(missingNames, ","))) + missing := make(map[int64]struct{}, len(mustAnalyzed)) + for colID := range mustAnalyzed { + if _, ok := colSet[colID]; !ok { + missing[colID] = struct{}{} + } + } + if len(missing) > 0 { + missingNames := make([]string, 0, len(missing)) + for _, col := range tbl.TableInfo.Columns { + if _, ok := missing[col.ID]; ok { + missingNames = append(missingNames, col.Name.O) + } + } + b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("Columns %s are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", strings.Join(missingNames, ","))) + } } - } - columnsInfo := make([]*model.ColumnInfo, 0, len(columnIDs)) - for _, col := range tblInfo.Columns { - if _, ok := columnIDs[col.ID]; ok { - columnsInfo = append(columnsInfo, col) + for colID := range mustAnalyzed { + colSet[colID] = struct{}{} } + colList := colSet2colList(colSet) + if mustAllColumns { + return tbl.TableInfo.Columns, colList, nil + } + return colList, colList, nil } - return columnsInfo, nil + return nil, nil, nil } func getColOffsetForAnalyze(colsInfo []*model.ColumnInfo, colID int64) int { @@ -1988,12 +2062,17 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( if err != nil { return nil, err } - astColsInfo, err := b.getFullAnalyzeColumnsInfo(astColList, tbl, true) + var predicateCols, mustAnalyzedCols calcOnceMap + ver := version + statsHandle := domain.GetDomain(b.ctx).StatsHandle() + // If the statistics of the table is version 1, we must analyze all columns to overwrites all of old statistics. + mustAllColumns := !statsHandle.CheckAnalyzeVersion(tbl.TableInfo, physicalIDs, &ver) + astColsInfo, _, err := b.getFullAnalyzeColumnsInfo(tbl, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns, true) if err != nil { return nil, err } isAnalyzeTable := len(as.PartitionNames) == 0 - optionsMap, colsInfoMap, err := b.genV2AnalyzeOptions(persistOpts, tbl, isAnalyzeTable, physicalIDs, astOpts, as.ColumnChoice, astColList) + optionsMap, colsInfoMap, err := b.genV2AnalyzeOptions(persistOpts, tbl, isAnalyzeTable, physicalIDs, astOpts, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns) if err != nil { return nil, err } @@ -2049,6 +2128,8 @@ func (b *PlanBuilder) genV2AnalyzeOptions( astOpts map[ast.AnalyzeOptionType]uint64, astColChoice model.ColumnChoice, astColList []*model.ColumnInfo, + predicateCols, mustAnalyzedCols *calcOnceMap, + mustAllColumns bool, ) (map[int64]V2AnalyzeOptions, map[int64][]*model.ColumnInfo, error) { optionsMap := make(map[int64]V2AnalyzeOptions, len(physicalIDs)) colsInfoMap := make(map[int64][]*model.ColumnInfo, len(physicalIDs)) @@ -2067,7 +2148,7 @@ func (b *PlanBuilder) genV2AnalyzeOptions( tblColChoice, tblColList = mergeColumnList(astColChoice, astColList, tblSavedColChoice, tblSavedColList) } tblFilledOpts := fillAnalyzeOptionsV2(tblOpts) - tblColsInfo, tblColList, err := b.getFinalAnalyzeColList(tblColChoice, tblColList, tbl) + tblColsInfo, tblColList, err := b.getFullAnalyzeColumnsInfo(tbl, tblColChoice, tblColList, predicateCols, mustAnalyzedCols, mustAllColumns, false) if err != nil { return nil, nil, err } @@ -2093,7 +2174,7 @@ func (b *PlanBuilder) genV2AnalyzeOptions( mergedOpts := mergeAnalyzeOptions(astOpts, savedOpts) filledMergedOpts := fillAnalyzeOptionsV2(mergedOpts) finalColChoice, mergedColList := mergeColumnList(astColChoice, astColList, savedColChoice, savedColList) - finalColsInfo, finalColList, err := b.getFinalAnalyzeColList(finalColChoice, mergedColList, tbl) + finalColsInfo, finalColList, err := b.getFullAnalyzeColumnsInfo(tbl, finalColChoice, mergedColList, predicateCols, mustAnalyzedCols, mustAllColumns, false) if err != nil { return nil, nil, err } @@ -2183,25 +2264,6 @@ func mergeColumnList(choice1 model.ColumnChoice, list1 []*model.ColumnInfo, choi return choice2, list2 } -func (b *PlanBuilder) getFinalAnalyzeColList(choice model.ColumnChoice, list []*model.ColumnInfo, tbl *ast.TableName) ([]*model.ColumnInfo, []*model.ColumnInfo, error) { - fullColumns := tbl.TableInfo.Cols() - emptyColumns := make([]*model.ColumnInfo, 0) - switch choice { - case model.AllColumns: - return fullColumns, emptyColumns, nil - case model.ColumnList: - list, err := b.getFullAnalyzeColumnsInfo(list, tbl, false) - if err != nil { - return nil, nil, err - } - return list, list, nil - case model.PredicateColumns: // TODO - return fullColumns, emptyColumns, nil - default: - return fullColumns, emptyColumns, nil - } -} - func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64, version int) (Plan, error) { p := &Analyze{Opts: opts} p.OptionsMap = make(map[int64]V2AnalyzeOptions) @@ -2237,7 +2299,10 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A } continue } - if len(as.ColumnNames) > 0 { + if as.ColumnChoice == model.PredicateColumns { + return nil, errors.Errorf("Only the analyze version 2 supports analyzing predicate columns") + } + if as.ColumnChoice == model.ColumnList { return nil, errors.Errorf("Only the analyze version 2 supports analyzing the specified columns") } for _, idx := range idxInfo { diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 162231f69c329..2abb2d706f880 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1335,6 +1335,20 @@ var defaultSysVars = []*SysVar{ return nil }, }, + {Scope: ScopeGlobal, Name: TiDBEnableColumnTracking, Value: BoolToOnOff(DefTiDBEnableColumnTracking), skipInit: true, Type: TypeBool, GetGlobal: func(s *SessionVars) (string, error) { + return BoolToOnOff(EnableColumnTracking.Load()), nil + }, SetGlobal: func(s *SessionVars, val string) error { + v := TiDBOptOn(val) + if !v { + // Set the location to UTC to avoid time zone interference. + disableTime := time.Now().UTC().Format(types.UTCTimeFormat) + if err := setTiDBTableValue(s, TiDBDisableColumnTrackingTime, disableTime, "Record the last time tidb_enable_column_tracking is set off"); err != nil { + return err + } + } + EnableColumnTracking.Store(v) + return nil + }}, } // FeedbackProbability points to the FeedbackProbability in statistics package. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index c4e7d72dd2b4d..1fc5d49a0392e 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -624,6 +624,12 @@ const ( TiDBEnableHistoricalStats = "tidb_enable_historical_stats" // TiDBPersistAnalyzeOptions persists analyze options for later analyze and auto-analyze TiDBPersistAnalyzeOptions = "tidb_persist_analyze_options" + // TiDBEnableColumnTracking enables collecting predicate columns. + TiDBEnableColumnTracking = "tidb_enable_column_tracking" + // TiDBDisableColumnTrackingTime records the last time TiDBEnableColumnTracking is set off. + // It is used to invalidate the collected predicate columns after turning off TiDBEnableColumnTracking, which avoids physical deletion. + // It doesn't have cache in memory and we directly get/set the variable value from/to mysql.tidb. + TiDBDisableColumnTrackingTime = "tidb_disable_column_tracking_time" ) // TiDB intentional limits @@ -778,6 +784,7 @@ const ( DefTimestamp = "0" DefTiDBEnableIndexMerge = true DefTiDBPersistAnalyzeOptions = true + DefTiDBEnableColumnTracking = true ) // Process global variables. @@ -807,4 +814,5 @@ var ( EnableTSOFollowerProxy = atomic.NewBool(DefTiDBEnableTSOFollowerProxy) RestrictedReadOnly = atomic.NewBool(DefTiDBRestrictedReadOnly) PersistAnalyzeOptions = atomic.NewBool(DefTiDBPersistAnalyzeOptions) + EnableColumnTracking = atomic.NewBool(DefTiDBEnableColumnTracking) ) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index f36a435c81f22..aa9fa33edeeb9 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -1804,8 +1804,34 @@ type colStatsTimeInfo struct { LastAnalyzedAt *types.Time } +// getDisableColumnTrackingTime reads the value of tidb_disable_column_tracking_time from mysql.tidb if it exists. +func (h *Handle) getDisableColumnTrackingTime() (*time.Time, error) { + rows, fields, err := h.execRestrictedSQL(context.Background(), "SELECT variable_value FROM %n.%n WHERE variable_name = %?", mysql.SystemDB, mysql.TiDBTable, variable.TiDBDisableColumnTrackingTime) + if err != nil { + return nil, err + } + if len(rows) == 0 { + return nil, nil + } + d := rows[0].GetDatum(0, &fields[0].Column.FieldType) + // The string represents the UTC time when tidb_enable_column_tracking is set to 0. + value, err := d.ToString() + if err != nil { + return nil, err + } + t, err := time.Parse(types.UTCTimeFormat, value) + if err != nil { + return nil, err + } + return &t, nil +} + // LoadColumnStatsUsage loads column stats usage information from disk. func (h *Handle) LoadColumnStatsUsage(loc *time.Location) (map[model.TableColumnID]colStatsTimeInfo, error) { + disableTime, err := h.getDisableColumnTrackingTime() + if err != nil { + return nil, errors.Trace(err) + } // Since we use another session from session pool to read mysql.column_stats_usage, which may have different @@time_zone, so we do time zone conversion here. rows, _, err := h.execRestrictedSQL(context.Background(), "SELECT table_id, column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00'), CONVERT_TZ(last_analyzed_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage") if err != nil { @@ -1823,8 +1849,12 @@ func (h *Handle) LoadColumnStatsUsage(loc *time.Location) (map[model.TableColumn if err != nil { return nil, errors.Trace(err) } - t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) - statsUsage.LastUsedAt = &t + // If `last_used_at` is before the time when `set global enable_column_tracking = 0`, we should ignore it because + // `set global enable_column_tracking = 0` indicates all the predicate columns collected before. + if disableTime == nil || gt.After(*disableTime) { + t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp) + statsUsage.LastUsedAt = &t + } } if !row.IsNull(3) { gt, err := row.GetTime(3).GoTime(time.UTC) @@ -1863,3 +1893,32 @@ func (h *Handle) CollectColumnsInExtendedStats(tableID int64) ([]int64, error) { } return columnIDs, nil } + +// GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans. +func (h *Handle) GetPredicateColumns(tableID int64) ([]int64, error) { + disableTime, err := h.getDisableColumnTrackingTime() + if err != nil { + return nil, errors.Trace(err) + } + rows, _, err := h.execRestrictedSQL(context.Background(), "SELECT column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage WHERE table_id = %? AND last_used_at IS NOT NULL", tableID) + if err != nil { + return nil, errors.Trace(err) + } + columnIDs := make([]int64, 0, len(rows)) + for _, row := range rows { + if row.IsNull(0) || row.IsNull(1) { + continue + } + colID := row.GetInt64(0) + gt, err := row.GetTime(1).GoTime(time.UTC) + if err != nil { + return nil, errors.Trace(err) + } + // If `last_used_at` is before the time when `set global enable_column_tracking = 0`, we don't regard the column as predicate column because + // `set global enable_column_tracking = 0` indicates all the predicate columns collected before. + if disableTime == nil || gt.After(*disableTime) { + columnIDs = append(columnIDs, colID) + } + } + return columnIDs, nil +} diff --git a/statistics/handle/handle_test.go b/statistics/handle/handle_test.go index 498e13ccb4e4d..4cb91b4928501 100644 --- a/statistics/handle/handle_test.go +++ b/statistics/handle/handle_test.go @@ -27,10 +27,8 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/domain" - "github.com/pingcap/tidb/errno" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/model" - "github.com/pingcap/tidb/parser/terror" "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/statistics" @@ -3109,518 +3107,3 @@ func (s *testStatsSuite) TestIncrementalModifyCountUpdate(c *C) { c.Assert(failpoint.Disable("github.com/pingcap/tidb/executor/injectBaseCount"), IsNil) c.Assert(failpoint.Disable("github.com/pingcap/tidb/executor/injectBaseModifyCount"), IsNil) } - -func (s *testStatsSuite) TestAnalyzeColumnsWithPrimaryKey(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("create table t (a int, b int, c int primary key)") - tk.MustExec("insert into t values (1,1,1), (1,1,2), (2,2,3), (2,2,4), (3,3,5), (4,3,6), (5,4,7), (6,4,8), (null,null,9)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", - "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 2) - c.Assert(rows[0][3], Equals, "a") - c.Assert(rows[1][3], Equals, "c") - - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 9")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t a 0 1 2", - "test t a 0 2 2", - "test t c 0 1 1", - "test t c 0 2 1")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 6 1 8 2 1", - "0 2 0 0 8 0 0", // column b is not analyzed - "0 3 9 0 9 2 1", - )) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t a 0 0 3 1 3 5 0", - "test t a 0 1 4 1 6 6 0", - "test t c 0 0 4 1 3 6 0", - "test t c 0 1 7 1 7 9 0")) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithIndex(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("create table t (a int, b int, c int, d int, index idx_b_d(b, d))") - tk.MustExec("insert into t values (1,1,null,1), (2,1,9,1), (1,1,8,1), (2,2,7,2), (1,3,7,3), (2,4,6,4), (1,4,6,5), (2,4,6,5), (1,5,6,5)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", - "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 3) - c.Assert(rows[0][3], Equals, "b") - c.Assert(rows[1][3], Equals, "c") - c.Assert(rows[2][3], Equals, "d") - - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 9")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t b 0 1 3", - "test t b 0 4 3", - "test t c 0 6 4", - "test t c 0 7 2", - "test t d 0 1 3", - "test t d 0 5 3", - "test t idx_b_d 1 (1, 1) 3", - "test t idx_b_d 1 (4, 5) 2")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 0 0 9 0 0", // column a is not analyzed - "0 2 5 0 9 2 1", - "0 3 4 1 8 2 -0.07", - "0 4 5 0 9 2 1", - "1 1 6 0 18 2 0")) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t b 0 0 2 1 2 3 0", - "test t b 0 1 3 1 5 5 0", - "test t c 0 0 2 1 8 9 0", - "test t d 0 0 2 1 2 3 0", - "test t d 0 1 3 1 4 4 0", - "test t idx_b_d 1 0 3 1 (2, 2) (4, 4) 0", - "test t idx_b_d 1 1 4 1 (5, 5) (5, 5) 0")) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithClusteredIndex(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("create table t (a int, b int, c int, d int, primary key(b, d) clustered)") - tk.MustExec("insert into t values (1,1,null,1), (2,2,9,2), (1,3,8,3), (2,4,7,4), (1,5,7,5), (2,6,6,6), (1,7,6,7), (2,8,6,8), (1,9,6,9)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t columns c with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", - "Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 3) - c.Assert(rows[0][3], Equals, "b") - c.Assert(rows[1][3], Equals, "c") - c.Assert(rows[2][3], Equals, "d") - - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 9")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t PRIMARY 1 (1, 1) 1", - "test t PRIMARY 1 (2, 2) 1", - "test t b 0 1 1", - "test t b 0 2 1", - "test t c 0 6 4", - "test t c 0 7 2", - "test t d 0 1 1", - "test t d 0 2 1")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 0 0 9 0 0", // column a is not analyzed - "0 2 9 0 9 2 1", - "0 3 4 1 8 2 -0.07", - "0 4 9 0 9 2 1", - "1 1 9 0 18 2 0")) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t PRIMARY 1 0 4 1 (3, 3) (6, 6) 0", - "test t PRIMARY 1 1 7 1 (7, 7) (9, 9) 0", - "test t b 0 0 4 1 3 6 0", - "test t b 0 1 7 1 7 9 0", - "test t c 0 0 2 1 8 9 0", - "test t d 0 0 4 1 3 6 0", - "test t d 0 1 7 1 7 9 0")) -} - -func (s *testStatsSuite) TestAnalyzeColumnsError(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("create table t (a int, b int)") - - // analyze version 1 doesn't support `ANALYZE COLUMNS c1, ..., cn` currently - tk.MustExec("set @@tidb_analyze_version = 1") - err := tk.ExecToErr("analyze table t columns a") - c.Assert(err.Error(), Equals, "Only the analyze version 2 supports analyzing the specified columns") - - // invalid column - tk.MustExec("set @@tidb_analyze_version = 2") - err = tk.ExecToErr("analyze table t columns c") - terr := errors.Cause(err).(*terror.Error) - c.Assert(terr.Code(), Equals, errors.ErrCode(errno.ErrAnalyzeMissColumn)) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithDynamicPartitionTable(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'") - tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)") - tk.MustExec("insert into t values (1,2,1), (2,4,1), (3,6,1), (4,8,2), (4,8,2), (5,10,3), (5,10,4), (5,10,5), (null,null,6), (11,22,7), (12,24,8), (13,26,9), (14,28,10), (15,30,11), (16,32,12), (16,32,13), (16,32,13), (16,32,14), (17,34,14), (17,34,14)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - defs := tbl.Meta().Partition.Definitions - p0ID := defs[0].ID - p1ID := defs[1].ID - - tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0.", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1.", - "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 6) - c.Assert(rows[0][:4], DeepEquals, []interface{}{"test", "t", "global", "a"}) - c.Assert(rows[1][:4], DeepEquals, []interface{}{"test", "t", "global", "c"}) - c.Assert(rows[2][:4], DeepEquals, []interface{}{"test", "t", "p0", "a"}) - c.Assert(rows[3][:4], DeepEquals, []interface{}{"test", "t", "p0", "c"}) - c.Assert(rows[4][:4], DeepEquals, []interface{}{"test", "t", "p1", "a"}) - c.Assert(rows[5][:4], DeepEquals, []interface{}{"test", "t", "p1", "c"}) - - rows = tk.MustQuery("show stats_meta where db_name = 'test' and table_name = 't'").Sort().Rows() - c.Assert(len(rows), Equals, 3) - c.Assert(append(rows[0][:3], rows[0][4:]...), DeepEquals, []interface{}{"test", "t", "global", "0", "20"}) - c.Assert(append(rows[1][:3], rows[1][4:]...), DeepEquals, []interface{}{"test", "t", "p0", "0", "9"}) - c.Assert(append(rows[2][:3], rows[2][4:]...), DeepEquals, []interface{}{"test", "t", "p1", "0", "11"}) - - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t global a 0 16 4", - "test t global a 0 5 3", - "test t global c 0 1 3", - "test t global c 0 14 3", - "test t p0 a 0 4 2", - "test t p0 a 0 5 3", - "test t p0 c 0 1 3", - "test t p0 c 0 2 2", - "test t p1 a 0 16 4", - "test t p1 a 0 17 2", - "test t p1 c 0 13 2", - "test t p1 c 0 14 3")) - - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t global idx 1 1 3", - "test t global idx 1 14 3", - "test t p0 idx 1 1 3", - "test t p0 idx 1 2 2", - "test t p1 idx 1 13 2", - "test t p1 idx 1 14 3")) - - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t global a 0 0 5 2 1 4 0", - "test t global a 0 1 12 2 17 17 0", - "test t global c 0 0 6 1 2 6 0", - "test t global c 0 1 14 2 13 13 0", - "test t p0 a 0 0 2 1 1 2 0", - "test t p0 a 0 1 3 1 3 3 0", - "test t p0 c 0 0 3 1 3 5 0", - "test t p0 c 0 1 4 1 6 6 0", - "test t p1 a 0 0 3 1 11 13 0", - "test t p1 a 0 1 5 1 14 15 0", - "test t p1 c 0 0 4 1 7 10 0", - "test t p1 c 0 1 6 1 11 12 0")) - - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t global idx 1 0 6 1 2 6 0", - "test t global idx 1 1 14 2 13 13 0", - "test t p0 idx 1 0 3 1 3 5 0", - "test t p0 idx 1 1 4 1 6 6 0", - "test t p1 idx 1 0 4 1 7 10 0", - "test t p1 idx 1 1 6 1 11 12 0")) - - tk.MustQuery("select table_id, is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms order by table_id, is_index, hist_id asc").Check( - testkit.Rows(fmt.Sprintf("%d 0 1 12 1 19 2 0", tblID), // global, a - fmt.Sprintf("%d 0 3 14 0 20 2 0", tblID), // global, c - fmt.Sprintf("%d 1 1 14 0 0 2 0", tblID), // global, idx - fmt.Sprintf("%d 0 1 5 1 8 2 1", p0ID), // p0, a - fmt.Sprintf("%d 0 2 0 0 8 0 0", p0ID), // p0, b, not analyzed - fmt.Sprintf("%d 0 3 6 0 9 2 1", p0ID), // p0, c - fmt.Sprintf("%d 1 1 6 0 9 2 0", p0ID), // p0, idx - fmt.Sprintf("%d 0 1 7 0 11 2 1", p1ID), // p1, a - fmt.Sprintf("%d 0 2 0 0 11 0 0", p1ID), // p1, b, not analyzed - fmt.Sprintf("%d 0 3 8 0 11 2 1", p1ID), // p1, c - fmt.Sprintf("%d 1 1 8 0 11 2 0", p1ID), // p1, idx - )) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithStaticPartitionTable(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("set @@tidb_partition_prune_mode = 'static'") - tk.MustExec("create table t (a int, b int, c int, index idx(c)) partition by range (a) (partition p0 values less than (10), partition p1 values less than maxvalue)") - tk.MustExec("insert into t values (1,2,1), (2,4,1), (3,6,1), (4,8,2), (4,8,2), (5,10,3), (5,10,4), (5,10,5), (null,null,6), (11,22,7), (12,24,8), (13,26,9), (14,28,10), (15,30,11), (16,32,12), (16,32,13), (16,32,13), (16,32,14), (17,34,14), (17,34,14)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - defs := tbl.Meta().Partition.Definitions - p0ID := defs[0].ID - p1ID := defs[1].ID - - tk.MustExec("analyze table t columns a with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0.", - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1.", - "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 4) - c.Assert(rows[0][:4], DeepEquals, []interface{}{"test", "t", "p0", "a"}) - c.Assert(rows[1][:4], DeepEquals, []interface{}{"test", "t", "p0", "c"}) - c.Assert(rows[2][:4], DeepEquals, []interface{}{"test", "t", "p1", "a"}) - c.Assert(rows[3][:4], DeepEquals, []interface{}{"test", "t", "p1", "c"}) - - rows = tk.MustQuery("show stats_meta where db_name = 'test' and table_name = 't'").Sort().Rows() - c.Assert(len(rows), Equals, 2) - c.Assert(append(rows[0][:3], rows[0][4:]...), DeepEquals, []interface{}{"test", "t", "p0", "0", "9"}) - c.Assert(append(rows[1][:3], rows[1][4:]...), DeepEquals, []interface{}{"test", "t", "p1", "0", "11"}) - - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t p0 a 0 4 2", - "test t p0 a 0 5 3", - "test t p0 c 0 1 3", - "test t p0 c 0 2 2", - "test t p1 a 0 16 4", - "test t p1 a 0 17 2", - "test t p1 c 0 13 2", - "test t p1 c 0 14 3")) - - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t p0 idx 1 1 3", - "test t p0 idx 1 2 2", - "test t p1 idx 1 13 2", - "test t p1 idx 1 14 3")) - - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 0").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t p0 a 0 0 2 1 1 2 0", - "test t p0 a 0 1 3 1 3 3 0", - "test t p0 c 0 0 3 1 3 5 0", - "test t p0 c 0 1 4 1 6 6 0", - "test t p1 a 0 0 3 1 11 13 0", - "test t p1 a 0 1 5 1 14 15 0", - "test t p1 c 0 0 4 1 7 10 0", - "test t p1 c 0 1 6 1 11 12 0")) - - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't' and is_index = 1").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t p0 idx 1 0 3 1 3 5 0", - "test t p0 idx 1 1 4 1 6 6 0", - "test t p1 idx 1 0 4 1 7 10 0", - "test t p1 idx 1 1 6 1 11 12 0")) - - tk.MustQuery("select table_id, is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms order by table_id, is_index, hist_id asc").Check( - testkit.Rows(fmt.Sprintf("%d 0 1 5 1 8 2 1", p0ID), // p0, a - fmt.Sprintf("%d 0 2 0 0 8 0 0", p0ID), // p0, b, not analyzed - fmt.Sprintf("%d 0 3 6 0 9 2 1", p0ID), // p0, c - fmt.Sprintf("%d 1 1 6 0 9 2 0", p0ID), // p0, idx - fmt.Sprintf("%d 0 1 7 0 11 2 1", p1ID), // p1, a - fmt.Sprintf("%d 0 2 0 0 11 0 0", p1ID), // p1, b, not analyzed - fmt.Sprintf("%d 0 3 8 0 11 2 1", p1ID), // p1, c - fmt.Sprintf("%d 1 1 8 0 11 2 0", p1ID), // p1, idx - )) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithExtendedStats(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("set @@tidb_enable_extended_stats = on") - tk.MustExec("create table t (a int, b int, c int)") - tk.MustExec("alter table t add stats_extended s1 correlation(b,c)") - tk.MustExec("insert into t values (5,1,1), (4,2,2), (3,3,3), (2,4,4), (1,5,5)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", - "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Sort().Rows() - c.Assert(len(rows), Equals, 2) - c.Assert(rows[0][3], Equals, "b") - c.Assert(rows[1][3], Equals, "c") - - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 5")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t b 0 1 1", - "test t b 0 2 1", - "test t c 0 1 1", - "test t c 0 2 1")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 0 0 5 0 0", // column a is not analyzed - "0 2 5 0 5 2 1", - "0 3 5 0 5 2 1", - )) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t b 0 0 2 1 3 4 0", - "test t b 0 1 3 1 5 5 0", - "test t c 0 0 2 1 3 4 0", - "test t c 0 1 3 1 5 5 0")) - rows = tk.MustQuery("show stats_extended where db_name = 'test' and table_name = 't'").Rows() - c.Assert(len(rows), Equals, 1) - c.Assert(rows[0][:len(rows[0])-1], DeepEquals, []interface{}{"test", "t", "s1", "[b,c]", "correlation", "1.000000"}) -} - -func (s *testStatsSuite) TestAnalyzeColumnsWithVirtualColumnIndex(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("create table t (a int, b int, c int as (b+1), index idx(c))") - tk.MustExec("insert into t (a,b) values (1,1), (2,2), (3,3), (4,4), (5,4), (6,5), (7,5), (8,5), (null,null)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows( - "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t.", - "Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats.", - )) - // virtual column c is skipped when dumping stats into disk, so only the stats of column b are updated - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() - c.Assert(len(rows), Equals, 1) - c.Assert(rows[0][3], Equals, "b") - - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 9")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t b 0 4 2", - "test t b 0 5 3", - "test t idx 1 5 2", - "test t idx 1 6 3")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 0 0 0 0", // column a is not analyzed - "0 2 5 1 2 1", - "0 3 0 0 0 0", // column c is not analyzed - "1 1 5 1 2 0")) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t b 0 0 2 1 1 2 0", - "test t b 0 1 3 1 3 3 0", - "test t idx 1 0 2 1 2 3 0", - "test t idx 1 1 3 1 4 4 0")) -} - -func (s *testStatsSuite) TestAnalyzeColumnsAfterAnalyzeAll(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("set @@tidb_analyze_version = 2") - tk.MustExec("create table t (a int, b int)") - tk.MustExec("insert into t (a,b) values (1,1), (1,1), (2,2), (2,2), (3,3), (4,4)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - is := s.do.InfoSchema() - tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblID := tbl.Meta().ID - - tk.MustExec("analyze table t with 2 topn, 2 buckets") - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 6")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t a 0 1 2", - "test t a 0 2 2", - "test t b 0 1 2", - "test t b 0 2 2")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 4 0 6 2 1", - "0 2 4 0 6 2 1")) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t a 0 0 2 1 3 4 0", - "test t b 0 0 2 1 3 4 0")) - - tk.MustExec("insert into t (a,b) values (1,1), (6,6)") - c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil) - - tk.MustExec("analyze table t columns b with 2 topn, 2 buckets") - // Column a is not analyzed in second ANALYZE. We keep the outdated stats of column a rather than delete them. - tk.MustQuery(fmt.Sprintf("select modify_count, count from mysql.stats_meta where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 8")) - tk.MustQuery("show stats_topn where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_idx, value, count - testkit.Rows("test t a 0 1 2", - "test t a 0 2 2", - "test t b 0 1 3", - "test t b 0 2 2")) - tk.MustQuery(fmt.Sprintf("select is_index, hist_id, distinct_count, null_count, tot_col_size, stats_ver, truncate(correlation,2) from mysql.stats_histograms where table_id = %d", tblID)).Sort().Check( - testkit.Rows("0 1 4 0 8 2 1", // tot_col_size of column a is updated to 8 by DumpStatsDeltaToKV - "0 2 5 0 8 2 0.76")) - tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check( - // db, tbl, part, col, is_index, bucket_id, count, repeats, lower, upper, ndv - testkit.Rows("test t a 0 0 2 1 3 4 0", - "test t b 0 0 2 1 3 4 0", - "test t b 0 1 3 1 6 6 0")) - tk.MustQuery(fmt.Sprintf("select hist_id from mysql.stats_histograms where version = (select version from mysql.stats_meta where table_id = %d)", tblID)).Check(testkit.Rows("2")) -} diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 401b46df92b9a..d9921fe948b72 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -875,6 +875,9 @@ func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.Query // DumpColStatsUsageToKV sweeps the whole list, updates the column stats usage map and dumps it to KV. func (h *Handle) DumpColStatsUsageToKV() error { + if !variable.EnableColumnTracking.Load() { + return nil + } h.sweepList() h.colMap.Lock() colMap := h.colMap.data diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index ff220dac5c6e5..6552d547fb4f3 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -2341,6 +2341,13 @@ func (s *testSerialStatsSuite) TestAutoAnalyzeRatio(c *C) { func (s *testSerialStatsSuite) TestDumpColumnStatsUsage(c *C) { defer cleanEnv(c, s.store, s.do) tk := testkit.NewTestKit(c, s.store) + + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + h := s.do.StatsHandle() tk.MustExec("use test") tk.MustExec("create table t1(a int, b int)") @@ -2413,14 +2420,21 @@ func (s *testSerialStatsSuite) TestDumpColumnStatsUsage(c *C) { func (s *testSerialStatsSuite) TestCollectPredicateColumnsFromExecute(c *C) { for _, val := range []bool{false, true} { func(planCache bool) { - originalVal := plannercore.PreparedPlanCacheEnabled() + originalVal1 := plannercore.PreparedPlanCacheEnabled() defer func() { - plannercore.SetPreparedPlanCache(originalVal) + plannercore.SetPreparedPlanCache(originalVal1) }() plannercore.SetPreparedPlanCache(planCache) defer cleanEnv(c, s.store, s.do) tk := testkit.NewTestKit(c, s.store) + + originalVal2 := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal2)) + }() + tk.MustExec("set global tidb_enable_column_tracking = 1") + h := s.do.StatsHandle() tk.MustExec("use test") tk.MustExec("create table t1(a int, b int)") @@ -2457,3 +2471,47 @@ func (s *testSerialStatsSuite) TestCollectPredicateColumnsFromExecute(c *C) { }(val) } } + +func (s *testSerialStatsSuite) TestEnableAndDisableColumnTracking(c *C) { + defer cleanEnv(c, s.store, s.do) + tk := testkit.NewTestKit(c, s.store) + h := s.do.StatsHandle() + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a int, b int, c int)") + + originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) + defer func() { + tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) + }() + + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b > 1") + c.Assert(h.DumpColStatsUsageToKV(), IsNil) + rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() + c.Assert(len(rows), Equals, 1) + c.Assert(rows[0][3], Equals, "b") + + tk.MustExec("set global tidb_enable_column_tracking = 0") + // After tidb_enable_column_tracking is set to 0, the predicate columns collected before are invalidated. + tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows()) + + // Sleep for 1.5s to let `last_used_at` be larger than `tidb_disable_tracking_time`. + time.Sleep(1500 * time.Millisecond) + tk.MustExec("select * from t where a > 1") + c.Assert(h.DumpColStatsUsageToKV(), IsNil) + // We don't collect predicate columns when tidb_enable_column_tracking = 0 + tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows()) + + tk.MustExec("set global tidb_enable_column_tracking = 1") + tk.MustExec("select * from t where b < 1 and c > 1") + c.Assert(h.DumpColStatsUsageToKV(), IsNil) + rows = tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Sort().Rows() + c.Assert(len(rows), Equals, 2) + c.Assert(rows[0][3], Equals, "b") + c.Assert(rows[1][3], Equals, "c") + + // Test invalidating predicate columns again in order to check that tidb_disable_tracking_time can be updated. + tk.MustExec("set global tidb_enable_column_tracking = 0") + tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows()) +} diff --git a/types/time.go b/types/time.go index ada6b21f2135f..3218195a0c625 100644 --- a/types/time.go +++ b/types/time.go @@ -39,6 +39,8 @@ const ( TimeFormat = "2006-01-02 15:04:05" // TimeFSPFormat is time format with fractional seconds precision. TimeFSPFormat = "2006-01-02 15:04:05.000000" + // UTCTimeFormat is used to parse and format gotime. + UTCTimeFormat = "2006-01-02 15:04:05 UTC" ) const (