diff --git a/cmd/backup.go b/cmd/backup.go index 3fb8868b0..15298d944 100644 --- a/cmd/backup.go +++ b/cmd/backup.go @@ -6,6 +6,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/pingcap/tidb/ddl" + "github.com/pingcap/tidb/session" "github.com/spf13/cobra" "go.uber.org/zap" @@ -21,6 +22,11 @@ func runBackupCommand(command *cobra.Command, cmdName string) error { command.SilenceUsage = false return errors.Trace(err) } + if cfg.IgnoreStats { + // Do not run stat worker in BR. + session.DisableStats4Test() + } + if err := task.RunBackup(GetDefaultContext(), tidbGlue, cmdName, &cfg); err != nil { log.Error("failed to backup", zap.Error(err)) return errors.Trace(err) diff --git a/cmd/restore.go b/cmd/restore.go index 26aa20c59..6f17c2720 100644 --- a/cmd/restore.go +++ b/cmd/restore.go @@ -5,6 +5,7 @@ package cmd import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tidb/session" "github.com/spf13/cobra" "go.uber.org/zap" @@ -67,6 +68,7 @@ func NewRestoreCommand() *cobra.Command { } utils.LogBRInfo() task.LogArguments(c) + session.DisableStats4Test() summary.SetUnit(summary.RestoreUnit) return nil diff --git a/pkg/task/backup.go b/pkg/task/backup.go index 7c9ac7882..872f5dd7e 100644 --- a/pkg/task/backup.go +++ b/pkg/task/backup.go @@ -83,7 +83,11 @@ func DefineBackupFlags(flags *pflag.FlagSet) { // This flag can impact the online cluster, so hide it in case of abuse. _ = flags.MarkHidden(flagRemoveSchedulers) - flags.Bool(flagIgnoreStats, false, + // Disable stats by default. because of + // 1. DumpStatsToJson is not stable + // 2. It increases memory usage may cause BR OOM + // TODO: we need a better way to backup/restore stats. + flags.Bool(flagIgnoreStats, true, "ignore backup stats, used for test") // This flag is used for test. we should backup stats all the time. _ = flags.MarkHidden(flagIgnoreStats) diff --git a/tests/br_full_ddl/run.sh b/tests/br_full_ddl/run.sh index d9d444478..b87676516 100755 --- a/tests/br_full_ddl/run.sh +++ b/tests/br_full_ddl/run.sh @@ -64,12 +64,12 @@ run_sql "analyze table $DB.$TABLE;" curl $TIDB_IP:10080/stats/dump/$DB/$TABLE | jq '.columns.field0' | jq 'del(.last_update_version)' > backup_stats # backup full -echo "backup start..." +echo "backup start with stats..." # Do not log to terminal unset BR_LOG_TO_TERM cluster_index_before_backup=$(run_sql "show variables like '%cluster%';" | awk '{print $2}') -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --ratelimit 5 --concurrency 4 --log-file $LOG || cat $LOG +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --ratelimit 5 --concurrency 4 --log-file $LOG --ignore-stats=false || cat $LOG checksum_count=$(cat $LOG | grep "checksum success" | wc -l | xargs) if [ "${checksum_count}" != "1" ];then @@ -78,6 +78,9 @@ if [ "${checksum_count}" != "1" ];then exit 1 fi +echo "backup start without stats..." +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/${DB}_disable_stats" --concurrency 4 + run_sql "DROP DATABASE $DB;" cluster_index_before_restore=$(run_sql "show variables like '%cluster%';" | awk '{print $2}') @@ -89,6 +92,20 @@ if [[ "${cluster_index_before_backup}" != "${cluster_index_before_restore}" ]]; exit 1 fi +echo "restore full without stats..." +run_br restore full -s "local://$TEST_DIR/${DB}_disable_stats" --pd $PD_ADDR +curl $TIDB_IP:10080/stats/dump/$DB/$TABLE | jq '.columns.field0' | jq 'del(.last_update_version)' > restore_stats + +# stats should not be equal because we disable stats by default. +if diff -q backup_stats restore_stats > /dev/null +then + echo "TEST: [$TEST_NAME] fail due to stats are equal" + exit 1 +fi + +# clear restore environment +run_sql "DROP DATABASE $DB;" + # restore full echo "restore start..." export GO_FAILPOINTS="github.com/pingcap/br/pkg/pdutil/PDEnabledPauseConfig=return(true)"