diff --git a/pkg/diff/diff.go b/pkg/diff/diff.go index d90be8cb2..0c275009b 100644 --- a/pkg/diff/diff.go +++ b/pkg/diff/diff.go @@ -75,6 +75,9 @@ type TableDiff struct { // set false if want to comapre the data directly UseChecksum bool + // set true if just want compare data by checksum, will skip select data when checksum is not equal + OnlyUseChecksum bool + // collation config in mysql/tidb, should corresponding to charset. Collation string @@ -285,6 +288,11 @@ func (t *TableDiff) checkChunkDataEqual(ctx context.Context, checkJobs []*CheckJ log.Warn("checksum is not equal", zap.String("table", dbutil.TableName(job.Schema, job.Table)), zap.String("where", job.Where), zap.Reflect("args", job.Args), zap.Int64("source checksum", sourceChecksum), zap.Int64("target checksum", targetChecksum)) } + if t.UseChecksum && t.OnlyUseChecksum { + equal = false + continue + } + // if checksum is not equal or don't need compare checksum, compare the data log.Info("select data and then check data", zap.String("table", dbutil.TableName(job.Schema, job.Table)), zap.String("where", job.Where), zap.Reflect("args", job.Args)) sourceRows := make(map[string][]map[string]*dbutil.ColumnData) diff --git a/sync_diff_inspector/config.go b/sync_diff_inspector/config.go index 66a57d7b8..04f0103e7 100644 --- a/sync_diff_inspector/config.go +++ b/sync_diff_inspector/config.go @@ -181,6 +181,9 @@ type Config struct { // set false if want to comapre the data directly UseChecksum bool `toml:"use-checksum" json:"use-checksum"` + // set true if just want compare data by checksum, will skip select data when checksum is not equal. + OnlyUseChecksum bool `toml:"only-use-checksum" json:"only-use-checksum"` + // the name of the file which saves sqls used to fix different data FixSQLFile string `toml:"fix-sql-file" json:"fix-sql-file"` @@ -313,5 +316,17 @@ func (c *Config) checkConfig() bool { } } + if c.OnlyUseChecksum { + if !c.UseChecksum { + log.Error("need set use-checksum = true") + return false + } + } else { + if len(c.FixSQLFile) == 0 { + log.Warn("fix-sql-file is invalid, will use default value 'fix.sql'") + c.FixSQLFile = "fix.sql" + } + } + return true } diff --git a/sync_diff_inspector/config.toml b/sync_diff_inspector/config.toml index 4cfb8a1d4..d175f2756 100644 --- a/sync_diff_inspector/config.toml +++ b/sync_diff_inspector/config.toml @@ -18,15 +18,19 @@ sample-percent = 100 use-rowid = false # calculate the data's checksum, and compare data by checksum. +# set false if want to comapre the data directly use-checksum = true +# set true if just want compare data by checksum, will skip select data when checksum is not equal. +only-use-checksum = false + # ignore check table's data ignore-data-check = false # ignore check table's struct ignore-struct-check = false -# the name of the file which saves sqls used to fix different data +# the name of the file which saves sqls used to fix different data. fix-sql-file = "fix.sql" # use this tidb's statistics information to split chunk diff --git a/sync_diff_inspector/diff.go b/sync_diff_inspector/diff.go index fb531a4b6..e84890cf3 100644 --- a/sync_diff_inspector/diff.go +++ b/sync_diff_inspector/diff.go @@ -37,6 +37,7 @@ type Diff struct { checkThreadCount int useRowID bool useChecksum bool + onlyUseChecksum bool ignoreDataCheck bool ignoreStructCheck bool tables map[string]map[string]*TableConfig @@ -57,6 +58,7 @@ func NewDiff(ctx context.Context, cfg *Config) (diff *Diff, err error) { checkThreadCount: cfg.CheckThreadCount, useRowID: cfg.UseRowID, useChecksum: cfg.UseChecksum, + onlyUseChecksum: cfg.OnlyUseChecksum, ignoreDataCheck: cfg.IgnoreDataCheck, ignoreStructCheck: cfg.IgnoreStructCheck, tidbInstanceID: cfg.TiDBInstanceID, @@ -405,6 +407,7 @@ func (df *Diff) Equal() (err error) { CheckThreadCount: df.checkThreadCount, UseRowID: df.useRowID, UseChecksum: df.useChecksum, + OnlyUseChecksum: df.onlyUseChecksum, IgnoreStructCheck: df.ignoreStructCheck, IgnoreDataCheck: df.ignoreDataCheck, TiDBStatsSource: tidbStatsSource,